Data
We load and scale the data.
Code
data_clean <- read_sav ("data/Final_survey.sav" )
data_scale <- data_clean %>% dplyr:: select (- Einnummer, - Cluster, - Date, - Timepoint, - Meetweek) %>%
mutate (Gender = factor (Gender) %>% fct_recode ("Male" = "1" , "Female" = "2" )) %>%
mutate (PICA = factor (PICA) %>% fct_recode ("Yes" = "1" , "No" = "0" )) %>%
mutate (RLS = factor (case_when (RLS == 2 ~ 1 , RLS == 1 ~ 0 )) %>% fct_recode ("Yes" = "1" , "No" = "0" )) %>%
mutate (Intervention_months_factor = factor (case_when (Intervention_months_s < 6 ~ 0 , Intervention_months_s >= 6 & Intervention_months_s < 12 ~ 1 , Intervention_months_s >= 12 & Intervention_months_s < 18 ~ 2 , Intervention_months_s >= 18 & Intervention_months_s < 24 ~ 3 ,Intervention_months_s >= 24 ~ 4 ))) %>%
mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" )) %>%
mutate (Warmglow = as.numeric (Warmglow)) %>% mutate (Warmglow = as.factor (case_when (Warmglow >= 6 ~ 1 , TRUE & ! is.na (Warmglow) ~ 0 )) %>% fct_recode ("Yes" = "1" , "No" = "0" ))
data_scale$ Age <- scale (data_scale$ Age, center= TRUE , scale = FALSE )
data_scale$ Weight <- scale (data_scale$ Weight, center= TRUE , scale = FALSE )
data_scale$ Height <- scale (data_scale$ Height, center= TRUE , scale = FALSE )
Descriptives
We present a descriptives table for the outcomes we will analyse, including:
PICA
Restless legs syndrome (RLS)
Cognitive functioning (CFQ)
Physical health (SF_ph)
Mental health (SF_mh)
Warm glow
Fatigue (CIS)
Code
#| label: t1 premenopausal females
data_t1 <- data_scale %>% mutate (stap2 = as.factor (stap), stap2 = recode_factor (stap2, "2.1" = "2" ,"2.2" = "2" , "3.1" = "3" , "3.2" = "3" ), stap2 = factor (stap2,levels = c ("1" , "2" , "3" , "4" )))
prefemalest1<- table1:: table1 (~ Age + Hb + Ferritin + factor (PICA) + factor (RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor (stap2), data = subset (data_t1, data_t1$ Gender== "Female" & data_t1$ PostMeno== 0 ), caption = "Premenopausal females" , render.continuous= c (.= "Mean (SD)" , .= "Median [Q1, Q3]" ), render.categorical= c (.= "FREQ (PCT)" ))
prefemalest1
Premenopausal females
1(N=656)
2(N=675)
3(N=657)
4(N=460)
Overall(N=2448)
Age
Mean (SD)
-13.3 (7.51)
-13.6 (7.75)
-13.7 (7.48)
-14.0 (7.72)
-13.6 (7.60)
Median [Q1, Q3]
-15.0 [-19.0, -8.02]
-16.0 [-20.0, -8.02]
-16.0 [-20.0, -8.02]
-16.0 [-20.0, -9.02]
-16.0 [-20.0, -8.02]
Hb
Mean (SD)
9.96 (38.7)
23.1 (120)
11.4 (54.6)
10.5 (46.2)
14.1 (74.7)
Median [Q1, Q3]
8.40 [8.00, 8.80]
8.40 [8.00, 8.70]
8.30 [8.10, 8.80]
8.40 [8.00, 8.70]
8.40 [8.00, 8.70]
Ferritin
Mean (SD)
33.2 (30.3)
30.3 (30.5)
30.2 (32.0)
28.5 (26.4)
30.7 (30.2)
Median [Q1, Q3]
24.2 [13.5, 43.6]
22.6 [12.0, 38.5]
20.6 [11.7, 39.3]
22.0 [11.1, 38.9]
22.7 [12.1, 39.9]
Missing
29 (4.4%)
47 (7.0%)
35 (5.3%)
41 (8.9%)
152 (6.2%)
factor(PICA)
No
607 (92.5)
635 (94.1)
623 (94.8)
427 (92.8)
2292 (93.6)
Yes
11 (1.7)
13 (1.9)
12 (1.8)
18 (3.9)
54 (2.2)
Missing
38 (5.8%)
27 (4.0%)
22 (3.3%)
15 (3.3%)
102 (4.2%)
factor(RLS)
No
539 (82.2)
565 (83.7)
538 (81.9)
395 (85.9)
2037 (83.2)
Yes
39 (5.9)
46 (6.8)
43 (6.5)
23 (5.0)
151 (6.2)
Missing
78 (11.9%)
64 (9.5%)
76 (11.6%)
42 (9.1%)
260 (10.6%)
CFQ_Total
Mean (SD)
31.7 (14.0)
29.4 (12.6)
31.3 (12.8)
32.1 (13.5)
31.0 (13.3)
Median [Q1, Q3]
30.0 [23.0, 40.0]
28.0 [21.0, 37.0]
30.0 [23.0, 39.0]
31.0 [23.0, 40.0]
30.0 [22.0, 39.0]
Missing
37 (5.6%)
30 (4.4%)
22 (3.3%)
16 (3.5%)
105 (4.3%)
SF_ph
Mean (SD)
89.5 (9.60)
90.3 (9.89)
89.4 (10.6)
89.7 (9.33)
89.7 (9.91)
Median [Q1, Q3]
92.4 [86.1, 96.2]
92.5 [87.5, 96.3]
92.5 [86.9, 96.2]
92.4 [86.2, 96.2]
92.5 [86.8, 96.3]
Missing
22 (3.4%)
8 (1.2%)
10 (1.5%)
9 (2.0%)
49 (2.0%)
SF_mh
Mean (SD)
76.6 (14.1)
76.6 (15.1)
76.4 (14.9)
74.4 (16.5)
76.1 (15.1)
Median [Q1, Q3]
81.3 [72.3, 86.3]
81.5 [72.2, 86.5]
81.4 [73.4, 86.0]
80.5 [68.9, 85.4]
81.3 [72.1, 86.3]
Missing
19 (2.9%)
7 (1.0%)
9 (1.4%)
8 (1.7%)
43 (1.8%)
Warmglow
No
251 (38.3)
262 (38.8)
282 (42.9)
223 (48.5)
1018 (41.6)
Yes
312 (47.6)
341 (50.5)
304 (46.3)
193 (42.0)
1150 (47.0)
Missing
93 (14.2%)
72 (10.7%)
71 (10.8%)
44 (9.6%)
280 (11.4%)
CIS_Totalmean
Mean (SD)
67.1 (21.5)
67.9 (21.1)
66.9 (21.1)
68.3 (20.3)
67.5 (21.1)
Median [Q1, Q3]
76.0 [47.0, 84.0]
76.0 [48.9, 84.0]
76.0 [48.0, 84.0]
76.0 [52.0, 84.0]
76.0 [48.0, 84.0]
Missing
35 (5.3%)
23 (3.4%)
18 (2.7%)
11 (2.4%)
87 (3.6%)
Code
postfemalest1<- table1:: table1 (~ Age + Hb + Ferritin + factor (PICA) + factor (RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor (stap2), data = subset (data_t1, data_t1$ Gender== "Female" & data_t1$ PostMeno== 1 ), caption = "Postmenopausal females" , render.continuous= c (.= "Mean (SD)" , .= "Median [Q1, Q3]" ), render.categorical= c (.= "FREQ (PCT)" ))
postfemalest1
Postmenopausal females
1(N=352)
2(N=379)
3(N=456)
4(N=304)
Overall(N=1491)
Age
Mean (SD)
13.5 (6.76)
13.9 (6.82)
13.6 (6.56)
13.8 (6.91)
13.7 (6.74)
Median [Q1, Q3]
13.0 [7.98, 19.0]
13.0 [7.98, 18.5]
13.0 [7.98, 19.0]
13.0 [7.98, 19.2]
13.0 [7.98, 19.0]
Hb
Mean (SD)
8.56 (0.535)
19.0 (101)
17.2 (92.5)
8.48 (0.494)
13.8 (72.4)
Median [Q1, Q3]
8.50 [8.20, 8.90]
8.40 [8.10, 8.80]
8.50 [8.10, 8.90]
8.40 [8.10, 8.80]
8.50 [8.10, 8.90]
Ferritin
Mean (SD)
35.7 (28.5)
33.7 (26.6)
35.0 (35.3)
34.3 (30.6)
34.7 (30.6)
Median [Q1, Q3]
29.5 [17.7, 43.1]
27.1 [16.5, 44.3]
25.4 [13.4, 43.7]
26.1 [14.8, 44.0]
27.0 [15.3, 44.0]
Missing
10 (2.8%)
19 (5.0%)
37 (8.1%)
15 (4.9%)
81 (5.4%)
factor(PICA)
No
338 (96.0)
365 (96.3)
442 (96.9)
296 (97.4)
1441 (96.6)
Yes
8 (2.3)
6 (1.6)
6 (1.3)
5 (1.6)
25 (1.7)
Missing
6 (1.7%)
8 (2.1%)
8 (1.8%)
3 (1.0%)
25 (1.7%)
factor(RLS)
No
282 (80.1)
312 (82.3)
348 (76.3)
239 (78.6)
1181 (79.2)
Yes
25 (7.1)
26 (6.9)
57 (12.5)
34 (11.2)
142 (9.5)
Missing
45 (12.8%)
41 (10.8%)
51 (11.2%)
31 (10.2%)
168 (11.3%)
CFQ_Total
Mean (SD)
26.5 (11.8)
26.4 (11.1)
25.6 (10.3)
26.4 (12.1)
26.2 (11.2)
Median [Q1, Q3]
26.0 [18.0, 34.0]
26.0 [18.0, 34.0]
25.0 [19.0, 33.0]
25.0 [17.0, 34.0]
25.0 [18.0, 34.0]
Missing
5 (1.4%)
6 (1.6%)
11 (2.4%)
4 (1.3%)
26 (1.7%)
SF_ph
Mean (SD)
88.4 (11.7)
87.6 (12.2)
88.9 (10.5)
88.9 (10.0)
88.5 (11.2)
Median [Q1, Q3]
91.3 [85.0, 96.3]
91.3 [84.3, 95.0]
91.8 [86.1, 95.0]
91.2 [86.3, 95.0]
91.3 [85.6, 95.0]
Missing
6 (1.7%)
6 (1.6%)
7 (1.5%)
3 (1.0%)
22 (1.5%)
SF_mh
Mean (SD)
81.4 (12.6)
81.1 (11.4)
81.3 (11.7)
80.5 (13.0)
81.1 (12.1)
Median [Q1, Q3]
85.0 [78.6, 88.8]
84.3 [77.9, 88.8]
84.5 [78.6, 88.5]
83.8 [78.3, 88.2]
84.3 [78.4, 88.5]
Missing
4 (1.1%)
3 (0.8%)
5 (1.1%)
2 (0.7%)
14 (0.9%)
Warmglow
No
124 (35.2)
127 (33.5)
140 (30.7)
103 (33.9)
494 (33.1)
Yes
209 (59.4)
223 (58.8)
290 (63.6)
188 (61.8)
910 (61.0)
Missing
19 (5.4%)
29 (7.7%)
26 (5.7%)
13 (4.3%)
87 (5.8%)
CIS_Totalmean
Mean (SD)
60.5 (27.0)
63.5 (26.4)
64.4 (26.2)
65.8 (25.5)
63.5 (26.3)
Median [Q1, Q3]
68.0 [33.0, 85.0]
74.5 [35.8, 87.0]
77.0 [37.8, 87.0]
79.0 [39.0, 87.0]
75.0 [36.0, 86.8]
Missing
4 (1.1%)
5 (1.3%)
8 (1.8%)
2 (0.7%)
19 (1.3%)
Code
malest1<- table1:: table1 (~ Age + Hb + Ferritin + factor (PICA) + factor (RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor (stap2), data = subset (data_t1, data_t1$ Gender== "Male" ), caption = "Males" , render.continuous= c (.= "Mean (SD)" , .= "Median [Q1, Q3]" ), render.categorical= c (.= "FREQ (PCT)" ))
malest1
Males
1(N=848)
2(N=925)
3(N=1073)
4(N=788)
Overall(N=3634)
Age
Mean (SD)
3.23 (14.8)
3.26 (15.2)
3.26 (15.2)
4.70 (15.5)
3.56 (15.2)
Median [Q1, Q3]
4.98 [-10.0, 16.0]
4.98 [-11.0, 17.0]
4.98 [-10.0, 16.0]
6.98 [-10.0, 18.0]
5.98 [-10.0, 17.0]
Hb
Mean (SD)
11.7 (48.0)
20.1 (102)
12.1 (52.3)
14.3 (70.4)
14.5 (71.4)
Median [Q1, Q3]
9.40 [8.90, 9.80]
9.30 [8.90, 9.80]
9.20 [8.80, 9.70]
9.20 [8.70, 9.70]
9.30 [8.90, 9.70]
Ferritin
Mean (SD)
58.5 (70.4)
49.7 (50.3)
44.0 (51.7)
44.7 (66.4)
49.0 (59.8)
Median [Q1, Q3]
39.3 [24.4, 68.7]
34.0 [18.9, 59.6]
27.8 [13.8, 51.9]
25.8 [12.6, 52.1]
32.0 [16.9, 57.7]
Missing
27 (3.2%)
44 (4.8%)
37 (3.4%)
60 (7.6%)
168 (4.6%)
factor(PICA)
No
811 (95.6)
881 (95.2)
1011 (94.2)
747 (94.8)
3450 (94.9)
Yes
22 (2.6)
24 (2.6)
27 (2.5)
22 (2.8)
95 (2.6)
Missing
15 (1.8%)
20 (2.2%)
35 (3.3%)
19 (2.4%)
89 (2.4%)
factor(RLS)
No
754 (88.9)
810 (87.6)
936 (87.2)
673 (85.4)
3173 (87.3)
Yes
39 (4.6)
44 (4.8)
46 (4.3)
57 (7.2)
186 (5.1)
Missing
55 (6.5%)
71 (7.7%)
91 (8.5%)
58 (7.4%)
275 (7.6%)
CFQ_Total
Mean (SD)
24.8 (11.9)
24.2 (11.4)
24.7 (11.4)
24.2 (11.4)
24.5 (11.5)
Median [Q1, Q3]
24.0 [16.0, 32.0]
24.0 [16.0, 31.0]
24.0 [17.0, 31.0]
24.0 [17.0, 31.0]
24.0 [17.0, 31.0]
Missing
19 (2.2%)
22 (2.4%)
32 (3.0%)
27 (3.4%)
100 (2.8%)
SF_ph
Mean (SD)
90.8 (8.95)
90.9 (9.13)
91.4 (7.69)
90.7 (9.34)
91.0 (8.73)
Median [Q1, Q3]
92.5 [88.7, 96.3]
93.7 [88.7, 96.3]
92.5 [88.8, 96.3]
92.5 [87.5, 96.3]
92.5 [88.7, 96.3]
Missing
12 (1.4%)
14 (1.5%)
15 (1.4%)
11 (1.4%)
52 (1.4%)
SF_mh
Mean (SD)
81.7 (12.1)
82.0 (12.1)
81.6 (12.7)
82.2 (11.6)
81.9 (12.2)
Median [Q1, Q3]
85.0 [79.3, 88.8]
85.3 [79.5, 89.3]
85.3 [79.5, 88.8]
85.3 [79.5, 89.3]
85.3 [79.5, 88.8]
Missing
8 (0.9%)
11 (1.2%)
11 (1.0%)
9 (1.1%)
39 (1.1%)
Warmglow
No
307 (36.2)
318 (34.4)
390 (36.3)
283 (35.9)
1298 (35.7)
Yes
478 (56.4)
552 (59.7)
609 (56.8)
449 (57.0)
2088 (57.5)
Missing
63 (7.4%)
55 (5.9%)
74 (6.9%)
56 (7.1%)
248 (6.8%)
CIS_Totalmean
Mean (SD)
68.5 (23.8)
66.0 (24.5)
65.3 (24.9)
67.3 (24.7)
66.7 (24.5)
Median [Q1, Q3]
80.0 [45.0, 86.0]
78.0 [42.0, 86.0]
78.0 [40.0, 86.0]
80.0 [42.0, 87.0]
79.0 [42.0, 86.0]
Missing
15 (1.8%)
17 (1.8%)
26 (2.4%)
18 (2.3%)
76 (2.1%)
Analysis
PICA
PICA is diagnosed when donor answers yes to: do you crave and regularly eat non-food items such as ice, clay, mud, sand, raw pasta, chalk or charcoal? (translated)
Models
Code
PICA_fitM <- glm (formula = PICA ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" ,family = binomial)
PICA_fit_preF <- glm (formula = PICA ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 ,family = binomial)
PICA_fit_postF <- glm (formula = PICA ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 ,family = binomial)
pl <- c (
` (Intercept) ` = "Intercept" ,
Intervention_months_factor0 = "0-5 months" ,
Intervention_months_factor1 = "6-11 months" ,
Intervention_months_factor2 = "12-17 months" ,
Intervention_months_factor3 = "18-23 months" ,
Intervention_months_factor4 = "24+ months"
)
tab_model (PICA_fit_preF, PICA_fit_postF, PICA_fitM, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "PICA" )
PICA
Pre-menopausal women
Post-menopausal women
Men
Predictors
Odds Ratios
CI
p
Odds Ratios
CI
p
Odds Ratios
CI
p
Intercept
0.01
0.00 – 0.02
<0.001
0.02
0.01 – 0.05
<0.001
0.03
0.02 – 0.04
<0.001
Age
0.93
0.88 – 0.97
0.001
0.99
0.93 – 1.05
0.694
1.00
0.98 – 1.01
0.645
Weight
1.03
1.00 – 1.05
0.016
1.03
0.99 – 1.06
0.128
1.02
1.01 – 1.04
0.007
Height
0.98
0.93 – 1.02
0.284
0.98
0.92 – 1.05
0.546
0.98
0.95 – 1.01
0.219
6-11 months
1.06
0.48 – 2.18
0.885
0.53
0.08 – 2.05
0.421
0.68
0.32 – 1.31
0.280
12-17 months
0.75
0.29 – 1.67
0.502
1.66
0.60 – 4.38
0.308
1.00
0.55 – 1.72
0.993
18-23 months
1.28
0.50 – 2.88
0.571
1.62
0.25 – 6.31
0.538
1.11
0.48 – 2.25
0.794
24+ months
0.80
0.29 – 1.86
0.629
1.93
0.52 – 5.87
0.275
0.91
0.47 – 1.65
0.766
Observations
2345
1465
3545
R2 Tjur
0.009
0.004
0.003
Model assumptions
Premenopausal females:
Code
plot (PICA_fit_preF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (PICA_fit_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 13
.rownames PICA Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3788 Yes -6.02 -11.2 -20.8 4 -4.38 2.96 2.97 0.00410
2 4453 Yes 1.98 -0.189 -13.8 4 -4.88 3.13 3.13 0.00275
3 7722 Yes -21.0 36.8 -1.76 3 -1.98 2.06 2.10 0.0395
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = PICA), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 4 influential data points
# A tibble: 4 × 13
.rowna…¹ PICA Age[,1] Weigh…² Heigh…³ Inter…⁴ .fitted .resid .std.…⁵ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3642 Yes -11.0 -23.2 -8.76 4 -4.61 3.04 3.04 0.00219
2 4028 Yes -0.0154 -10.2 -3.76 3 -4.76 3.09 3.09 0.00261
3 4453 Yes 1.98 -0.189 -13.8 4 -4.88 3.13 3.13 0.00275
4 7651 Yes 0.985 -20.2 -14.8 0 -5.07 3.19 3.19 0.00171
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²Weight[,1], ³Height[,1],
# ⁴Intervention_months_factor, ⁵.std.resid
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (PICA_fit_preF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (PICA_fit_preF,col= "red" ,id.n= 3 )
StudRes Hat CookD
3788 3.0187720 0.004100397 0.041247398
4453 3.1823811 0.002745063 0.045224283
5873 -0.6309422 0.073630863 0.002253238
7651 3.2299332 0.001706341 0.034185759
7722 2.1271122 0.039465735 0.038884139
Code
car:: influenceIndexPlot (PICA_fit_preF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 4453 , - 3788 , - 7722 ), ]
PICA_fit_preF_upd1 <- update (PICA_fit_preF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (PICA_fit_preF,PICA_fit_preF_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 0)
2: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -4.838 -4.978
SE 0.449 0.480
Age -0.0762 -0.0873
SE 0.0226 0.0243
Weight 0.0261 0.0210
SE 0.0109 0.0117
Height -0.0249 -0.0110
SE 0.0232 0.0238
Intervention_months_factor1 0.0552 0.0624
SE 0.3830 0.3830
Intervention_months_factor2 -0.293 -0.284
SE 0.435 0.436
Intervention_months_factor3 0.2481 0.0957
SE 0.4382 0.4646
Intervention_months_factor4 -0.224 -0.628
SE 0.463 0.546
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6176 -0.2450 -0.2033 -0.1570 3.1874
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.83824 0.44894 -10.777 < 2e-16 ***
Age -0.07616 0.02259 -3.372 0.000746 ***
Weight 0.02613 0.01088 2.402 0.016300 *
Height -0.02488 0.02323 -1.071 0.284121
Intervention_months_factor1 0.05523 0.38299 0.144 0.885335
Intervention_months_factor2 -0.29262 0.43547 -0.672 0.501609
Intervention_months_factor3 0.24805 0.43821 0.566 0.571356
Intervention_months_factor4 -0.22381 0.46300 -0.483 0.628810
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 514.02 on 2344 degrees of freedom
Residual deviance: 496.47 on 2337 degrees of freedom
(685 observations deleted due to missingness)
AIC: 512.47
Number of Fisher Scoring iterations: 7
Code
summary (PICA_fit_preF_upd1)
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.5675 -0.2446 -0.1964 -0.1426 3.2650
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.97806 0.48015 -10.368 < 2e-16 ***
Age -0.08729 0.02435 -3.586 0.000336 ***
Weight 0.02095 0.01169 1.792 0.073148 .
Height -0.01095 0.02381 -0.460 0.645551
Intervention_months_factor1 0.06238 0.38298 0.163 0.870616
Intervention_months_factor2 -0.28433 0.43556 -0.653 0.513890
Intervention_months_factor3 0.09570 0.46465 0.206 0.836825
Intervention_months_factor4 -0.62845 0.54566 -1.152 0.249435
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 491.23 on 2341 degrees of freedom
Residual deviance: 472.08 on 2334 degrees of freedom
(685 observations deleted due to missingness)
AIC: 488.08
Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car:: vif (PICA_fit_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.056045 1 1.027641
Weight 1.163751 1 1.078773
Height 1.111582 1 1.054316
Intervention_months_factor 1.006976 4 1.000869
Postmenopausal females:
Code
plot (PICA_fit_postF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (PICA_fit_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 13
.rownames PICA Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1968 Yes 22.0 6.81 -6.76 1 -4.69 3.06 3.07 0.00540
2 3889 Yes 17.0 -19.2 -18.8 3 -3.92 2.81 2.83 0.0121
3 7354 Yes 25.0 -1.19 -3.76 1 -4.98 3.16 3.17 0.00450
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = PICA), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 4 influential data points
# A tibble: 3 × 13
.rownames PICA Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1968 Yes 22.0 6.81 -6.76 1 -4.69 3.06 3.07 0.00540
2 6560 Yes 27.0 -9.19 -7.76 0 -4.50 3.00 3.01 0.00312
3 7354 Yes 25.0 -1.19 -3.76 1 -4.98 3.16 3.17 0.00450
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (PICA_fit_postF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (PICA_fit_postF,col= "red" ,id.n= 3 )
StudRes Hat CookD
1968 3.159646 0.005396830 0.0740710103
2897 -0.420213 0.060323738 0.0007615856
3889 2.916690 0.012066170 0.0782557928
4404 -0.259491 0.090758385 0.0004468314
7354 3.262409 0.004500247 0.0829501987
Code
car:: influenceIndexPlot (PICA_fit_postF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 1968 , - 3889 , - 7354 ), ]
PICA_fit_postF_upd1 <- update (PICA_fit_postF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (PICA_fit_postF,PICA_fit_postF_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 1)
2: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -4.099 -4.838
SE 0.539 0.449
Age -0.0121 -0.0762
SE 0.0308 0.0226
Weight 0.0250 0.0261
SE 0.0164 0.0109
Height -0.0201 -0.0249
SE 0.0333 0.0232
Intervention_months_factor1 -0.6280 0.0552
SE 0.7799 0.3830
Intervention_months_factor2 0.509 -0.293
SE 0.499 0.435
Intervention_months_factor3 0.483 0.248
SE 0.785 0.438
Intervention_months_factor4 0.655 -0.224
SE 0.600 0.463
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 1)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.4133 -0.2063 -0.1734 -0.1483 3.1595
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.09907 0.53920 -7.602 2.91e-14 ***
Age -0.01214 0.03081 -0.394 0.694
Weight 0.02500 0.01644 1.521 0.128
Height -0.02009 0.03328 -0.604 0.546
Intervention_months_factor1 -0.62798 0.77987 -0.805 0.421
Intervention_months_factor2 0.50858 0.49885 1.020 0.308
Intervention_months_factor3 0.48344 0.78492 0.616 0.538
Intervention_months_factor4 0.65540 0.60015 1.092 0.275
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 253.11 on 1464 degrees of freedom
Residual deviance: 247.37 on 1457 degrees of freedom
(608 observations deleted due to missingness)
AIC: 263.37
Number of Fisher Scoring iterations: 7
Code
summary (PICA_fit_postF_upd1)
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6176 -0.2450 -0.2033 -0.1570 3.1874
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.83824 0.44894 -10.777 < 2e-16 ***
Age -0.07616 0.02259 -3.372 0.000746 ***
Weight 0.02613 0.01088 2.402 0.016300 *
Height -0.02488 0.02323 -1.071 0.284121
Intervention_months_factor1 0.05523 0.38299 0.144 0.885335
Intervention_months_factor2 -0.29262 0.43547 -0.672 0.501609
Intervention_months_factor3 0.24805 0.43821 0.566 0.571356
Intervention_months_factor4 -0.22381 0.46300 -0.483 0.628810
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 514.02 on 2344 degrees of freedom
Residual deviance: 496.47 on 2337 degrees of freedom
(685 observations deleted due to missingness)
AIC: 512.47
Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car:: vif (PICA_fit_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.040166 1 1.019885
Weight 1.158062 1 1.076133
Height 1.185532 1 1.088821
Intervention_months_factor 1.011547 4 1.001436
Males:
Code
plot (PICA_fitM, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (PICA_fitM) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 13
.rownames PICA Age[,1] Weight…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 897 Yes -9.02 66.8 23.2 2 -2.54 2.29 2.31 0.0172
2 6725 Yes -11.0 51.8 -2.76 4 -2.46 2.25 2.28 0.0201
3 8024 Yes 24.0 8.81 -24.8 0 -3.00 2.47 2.48 0.0115
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = PICA), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 4 influential data points
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, PICA <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
# .cooksd <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (PICA_fitM)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (PICA_fitM,col= "red" ,id.n= 3 )
StudRes Hat CookD
1294 -0.3973451 0.068791951 0.0007832811
5248 -0.4809799 0.032709351 0.0005258886
6621 2.9351986 0.002152348 0.0183783288
6725 2.3060317 0.020059249 0.0304778698
7465 2.9609268 0.002124400 0.0195002389
8024 2.5155578 0.011478062 0.0294584111
Code
car:: influenceIndexPlot (PICA_fitM, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 6621 , - 1294 , - 7465 ), ]
PICA_fitM_upd1 <- update (PICA_fitM, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (PICA_fitM,PICA_fitM_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender == "Male")
2: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -3.590 -4.838
SE 0.183 0.449
Age -0.00332 -0.07616
SE 0.00721 0.02259
Weight 0.02196 0.02613
SE 0.00816 0.01088
Height -0.0193 -0.0249
SE 0.0157 0.0232
Intervention_months_factor1 -0.3809 0.0552
SE 0.3526 0.3830
Intervention_months_factor2 -0.00243 -0.29262
SE 0.28825 0.43547
Intervention_months_factor3 0.102 0.248
SE 0.389 0.438
Intervention_months_factor4 -0.0948 -0.2238
SE 0.3190 0.4630
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Male")
Deviance Residuals:
Min 1Q Median 3Q Max
-0.4767 -0.2463 -0.2259 -0.2076 2.9345
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.589998 0.182792 -19.640 < 2e-16 ***
Age -0.003321 0.007205 -0.461 0.64490
Weight 0.021961 0.008161 2.691 0.00712 **
Height -0.019276 0.015696 -1.228 0.21943
Intervention_months_factor1 -0.380917 0.352632 -1.080 0.28005
Intervention_months_factor2 -0.002432 0.288246 -0.008 0.99327
Intervention_months_factor3 0.101603 0.388965 0.261 0.79393
Intervention_months_factor4 -0.094791 0.319002 -0.297 0.76635
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 875.12 on 3544 degrees of freedom
Residual deviance: 866.95 on 3537 degrees of freedom
(671 observations deleted due to missingness)
AIC: 882.95
Number of Fisher Scoring iterations: 6
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6176 -0.2450 -0.2033 -0.1570 3.1874
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.83824 0.44894 -10.777 < 2e-16 ***
Age -0.07616 0.02259 -3.372 0.000746 ***
Weight 0.02613 0.01088 2.402 0.016300 *
Height -0.02488 0.02323 -1.071 0.284121
Intervention_months_factor1 0.05523 0.38299 0.144 0.885335
Intervention_months_factor2 -0.29262 0.43547 -0.672 0.501609
Intervention_months_factor3 0.24805 0.43821 0.566 0.571356
Intervention_months_factor4 -0.22381 0.46300 -0.483 0.628810
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 514.02 on 2344 degrees of freedom
Residual deviance: 496.47 on 2337 degrees of freedom
(685 observations deleted due to missingness)
AIC: 512.47
Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car:: vif (PICA_fitM) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.064441 1 1.031718
Weight 1.215704 1 1.102590
Height 1.201809 1 1.096271
Intervention_months_factor 1.011403 4 1.001418
Restless legs syndrome
Restless legs syndrome (RLS) diagnose was based of Burchell & Alleen, 2008. Validation of the self-completed Cambridge-Hopkins questionnaire (CH-RLSq) for ascertainment of restless legs syndrome (RLS) in a population survey.
Predict by ferritin
Code
# Compute the analysis of variance
data_scale$ RLS <- as.numeric (data_scale$ RLS)
means <- round (aggregate (Ferritin ~ RLS, data_scale, mean), 1 )
res.aov <- aov (RLS ~ Fergroup, data = data_scale)
summary (res.aov)
Df Sum Sq Mean Sq F value Pr(>F)
Fergroup 2 0.2 0.09586 1.502 0.223
Residuals 6508 415.3 0.06381
1644 observations deleted due to missingness
Code
#fit model
fit1 <- lm (RLS ~ 1 + Fergroup, data = data_scale)
summary (fit1)
Call:
lm(formula = RLS ~ 1 + Fergroup, data = data_scale)
Residuals:
Min 1Q Median 3Q Max
-0.07363 -0.07363 -0.07033 -0.05959 0.94041
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.059595 0.006167 171.831 <2e-16 ***
FergroupFerritin 15-30 0.014038 0.008519 1.648 0.0994 .
FergroupFerritin > 30 0.010733 0.007707 1.393 0.1638
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.2526 on 6508 degrees of freedom
(1644 observations deleted due to missingness)
Multiple R-squared: 0.0004615, Adjusted R-squared: 0.0001543
F-statistic: 1.502 on 2 and 6508 DF, p-value: 0.2227
Code
data_scale <- data_scale %>% mutate (RLS = factor (case_when (RLS == 2 ~ 1 , RLS == 1 ~ 0 )) %>% fct_recode ("Yes" = "1" , "No" = "0" ))
# Barplot
data_scale_per2 <- data_scale %>%
group_by (Fergroup) %>%
count (RLS) %>%
mutate (
Percentage = round (proportions (n) * 100 , 1 ),
res = str_c (n, "(" , Percentage, ")%" )
)
ggplot (subset (data_scale_per2, ! is.na (RLS)), aes (Fergroup, Percentage, fill = RLS)) +
geom_col (position = "dodge" ) +
geom_text (aes (label = res), vjust = - 0.2 )+
theme_bw ()
Code
ggplot (data_scale , mapping = aes (x = Fergroup, fill = RLS)) +
geom_bar (position = "dodge" ) +
theme_bw ()
Code
# t-test + boxplot
res <- res <- t.test (Ferritin ~ RLS, data = data_scale, var.equal = TRUE )
res
Two Sample t-test
data: Ferritin by RLS
t = -1.2289, df = 6509, p-value = 0.2192
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-7.624125 1.748641
sample estimates:
mean in group No mean in group Yes
40.47954 43.41729
Code
ggplot (subset (data_scale, ! is.na (RLS)), aes (x= RLS, y= Ferritin)) +
geom_boxplot () +
theme_bw () +
geom_text (x = 2.3 , y = 700 , label= paste ('Means:' , " " , means$ Ferritin[1 ], " " , means$ Ferritin[2 ], " \n " , 't(' ,res$ parameter, '), p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "RLS" , y= "Ferritin" )
Models
Code
RLS_fitM <- glm (formula = RLS ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" ,family = binomial)
RLS_fit_preF <- glm (formula = RLS ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 ,family = binomial)
RLS_fit_postF <- glm (formula = RLS ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 ,family = binomial)
pl <- c (
` (Intercept) ` = "Intercept" ,
Intervention_months_factor0 = "0-5 months" ,
Intervention_months_factor1 = "6-11 months" ,
Intervention_months_factor2 = "12-17 months" ,
Intervention_months_factor3 = "18-23 months" ,
Intervention_months_factor4 = "24+ months"
)
tab_model (RLS_fit_preF, RLS_fit_postF, RLS_fitM, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Restless Legs Syndrome" )
Restless Legs Syndrome
Pre-menopausal women
Post-menopausal women
Men
Predictors
Odds Ratios
CI
p
Odds Ratios
CI
p
Odds Ratios
CI
p
Intercept
0.07
0.04 – 0.11
<0.001
0.07
0.04 – 0.12
<0.001
0.04
0.03 – 0.06
<0.001
Age
1.03
1.00 – 1.05
0.024
1.02
0.99 – 1.04
0.229
1.01
1.00 – 1.02
0.175
Weight
1.00
0.99 – 1.02
0.925
1.01
0.99 – 1.02
0.386
1.01
1.00 – 1.02
0.120
Height
1.01
0.98 – 1.03
0.665
0.99
0.96 – 1.02
0.439
1.00
0.98 – 1.03
0.778
6-11 months
1.69
1.01 – 2.77
0.040
2.15
1.38 – 3.35
0.001
1.14
0.71 – 1.77
0.572
12-17 months
1.05
0.58 – 1.84
0.856
0.57
0.29 – 1.03
0.074
0.80
0.48 – 1.28
0.372
18-23 months
2.75
1.62 – 4.59
<0.001
1.08
0.48 – 2.17
0.843
1.93
1.16 – 3.09
0.008
24+ months
2.75
1.73 – 4.37
<0.001
2.10
1.24 – 3.49
0.005
1.63
1.08 – 2.43
0.018
Observations
2187
1322
3359
R2 Tjur
0.016
0.023
0.006
Model assumptions
Premenopausal females:
Code
plot (RLS_fit_preF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (RLS_fit_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) #residuals not above 3, no further attention needed
# A tibble: 3 × 13
.rownames RLS Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 6127 Yes -15.0 31.8 -0.760 1 -2.51 2.28 2.29 0.00847
2 6332 Yes -9.02 11.8 -16.8 2 -2.94 2.45 2.45 0.00575
3 7722 Yes -21.0 36.8 -1.76 3 -2.18 2.14 2.16 0.0155
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = RLS), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 )
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, RLS <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
# .cooksd <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (RLS_fit_preF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (RLS_fit_preF,col= "red" ,id.n= 3 )
StudRes Hat CookD
821 2.6013329 0.001601046 0.0055860203
3205 2.6277078 0.002192976 0.0081407297
3945 -0.5712971 0.024406454 0.0005599473
4031 1.9170009 0.019842383 0.0128369434
6332 2.4701972 0.005746905 0.0138111756
7722 2.1712067 0.015547237 0.0177414365
Code
car:: influenceIndexPlot (RLS_fit_preF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 4453 , - 3788 , - 7722 ), ]
RLS_fit_preF_upd1 <- update (RLS_fit_preF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (RLS_fit_preF,RLS_fit_preF_upd1) # no improvement
Calls:
1: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 0)
2: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -2.674 -2.662
SE 0.222 0.222
Age 0.0253 0.0274
SE 0.0112 0.0112
Weight 0.000723 -0.001605
SE 0.007668 0.007825
Height 0.00622 0.00659
SE 0.01437 0.01447
Intervention_months_factor1 0.522 0.522
SE 0.255 0.255
Intervention_months_factor2 0.0530 0.0517
SE 0.2914 0.2914
Intervention_months_factor3 1.010 0.971
SE 0.264 0.267
Intervention_months_factor4 1.012 1.021
SE 0.236 0.236
Code
Call:
glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6167 -0.4318 -0.3350 -0.2855 2.6153
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.6735479 0.2224715 -12.017 < 2e-16 ***
Age 0.0253457 0.0111974 2.264 0.023602 *
Weight 0.0007225 0.0076680 0.094 0.924929
Height 0.0062174 0.0143652 0.433 0.665151
Intervention_months_factor1 0.5222805 0.2548090 2.050 0.040394 *
Intervention_months_factor2 0.0529890 0.2913784 0.182 0.855695
Intervention_months_factor3 1.0104212 0.2643839 3.822 0.000132 ***
Intervention_months_factor4 1.0120963 0.2355889 4.296 1.74e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1098.6 on 2186 degrees of freedom
Residual deviance: 1064.6 on 2179 degrees of freedom
(843 observations deleted due to missingness)
AIC: 1080.6
Number of Fisher Scoring iterations: 5
Code
summary (RLS_fit_preF_upd1)
Call:
glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6151 -0.4266 -0.3349 -0.2840 2.6160
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.662328 0.222254 -11.979 < 2e-16 ***
Age 0.027447 0.011234 2.443 0.014560 *
Weight -0.001605 0.007825 -0.205 0.837503
Height 0.006588 0.014468 0.455 0.648873
Intervention_months_factor1 0.522305 0.254836 2.050 0.040406 *
Intervention_months_factor2 0.051651 0.291399 0.177 0.859310
Intervention_months_factor3 0.970704 0.267317 3.631 0.000282 ***
Intervention_months_factor4 1.020692 0.235664 4.331 1.48e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1092.9 on 2183 degrees of freedom
Residual deviance: 1059.3 on 2176 degrees of freedom
(843 observations deleted due to missingness)
AIC: 1075.3
Number of Fisher Scoring iterations: 5
Code
#multicollinearity
car:: vif (RLS_fit_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.069661 1 1.034244
Weight 1.229743 1 1.108938
Height 1.158989 1 1.076564
Intervention_months_factor 1.004111 4 1.000513
Postmenopausal females:
Code
plot (RLS_fit_postF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (RLS_fit_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 13
.rownames RLS Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1289 Yes 6.98 31.8 3.24 2 -2.86 2.42 2.43 0.00857
2 3919 Yes 5.98 9.81 9.24 3 -2.46 2.26 2.27 0.0141
3 4404 Yes 9.98 -16.2 -59.8 0 -1.85 2.00 2.08 0.0717
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = RLS), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 )
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, RLS <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
# .cooksd <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (RLS_fit_postF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (RLS_fit_postF,col= "red" ,id.n= 3 )
StudRes Hat CookD
1032 -0.6930935 0.046495196 0.001683483
1268 2.4950547 0.004650945 0.011990113
1290 2.4993598 0.004037976 0.010576868
3919 2.2923175 0.014064948 0.021209634
4404 2.1192001 0.071749478 0.066417273
Code
car:: influenceIndexPlot (RLS_fit_postF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 3919 , - 4404 ), ]
RLS_fit_postF_upd1 <- update (RLS_fit_postF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (RLS_fit_postF,RLS_fit_postF_upd1) # no improvement
Calls:
1: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 1)
2: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -2.596 -2.674
SE 0.253 0.222
Age 0.0161 0.0253
SE 0.0134 0.0112
Weight 0.007167 0.000723
SE 0.008268 0.007668
Height -0.01168 0.00622
SE 0.01507 0.01437
Intervention_months_factor1 0.767 0.522
SE 0.227 0.255
Intervention_months_factor2 -0.570 0.053
SE 0.319 0.291
Intervention_months_factor3 0.0751 1.0104
SE 0.3792 0.2644
Intervention_months_factor4 0.743 1.012
SE 0.263 0.236
Code
#multicollinearity
car:: vif (RLS_fit_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.030176 1 1.014976
Weight 1.138964 1 1.067222
Height 1.148782 1 1.071812
Intervention_months_factor 1.019413 4 1.002406
Males:
Code
plot (RLS_fitM, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (RLS_fitM) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 13
.rownames RLS Age[,1] Weight…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 791 Yes 6.98 56.8 -1.76 0 -2.50 2.27 2.28 0.0104
2 2907 Yes -15.0 46.8 3.24 4 -2.25 2.17 2.18 0.0103
3 4049 Yes -22.0 56.8 5.24 1 -2.55 2.29 2.31 0.0123
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = RLS), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 0 influential data points
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, RLS <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
# .cooksd <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (RLS_fitM)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (RLS_fitM,col= "red" ,id.n= 3 )
StudRes Hat CookD
791 2.2974174 0.010390502 0.0160950483
2756 2.6390085 0.002755262 0.0104591400
3057 -0.5567949 0.024789488 0.0005383252
4049 2.3267397 0.012279821 0.0201917850
5998 -0.4565452 0.019860153 0.0002806905
6336 2.6525166 0.002059943 0.0081785230
Code
car:: influenceIndexPlot (RLS_fitM, id.n= 3 )
Code
#multicollinearity
car:: vif (RLS_fitM) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.078063 1 1.038298
Weight 1.234575 1 1.111114
Height 1.238252 1 1.112768
Intervention_months_factor 1.009268 4 1.001154
Cognitive functioning
Scoring: https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/CFQ-form.pdf
Predict by ferritin
Code
res <- cor.test (data_scale$ CFQ_Total, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = CFQ_Total, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 75 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Cognitive functioning, higher is worse" , y= "Ferritin" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= CFQ_Total)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Cognitive functioning, higher is worse" )
Models
Code
CFQ_fitM <- lm (formula = CFQ_Total ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" )
CFQ_fit_preF <- lm (formula = CFQ_Total ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CFQ_fit_postF <- lm (formula = CFQ_Total ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
pl <- c (
` (Intercept) ` = "Intercept" ,
Intervention_months_factor0 = "0-5 months" ,
Intervention_months_factor1 = "6-11 months" ,
Intervention_months_factor2 = "12-17 months" ,
Intervention_months_factor3 = "18-23 months" ,
Intervention_months_factor4 = "24+ months"
)
tab_model (CFQ_fit_preF, CFQ_fit_postF, CFQ_fitM, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Cognitive functioning" )
Cognitive functioning
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
27.27
25.92 – 28.62
<0.001
28.03
26.52 – 29.54
<0.001
25.51
24.85 – 26.17
<0.001
Age
-0.25
-0.32 – -0.18
<0.001
-0.16
-0.25 – -0.08
<0.001
-0.14
-0.17 – -0.11
<0.001
Weight
0.01
-0.04 – 0.06
0.754
0.02
-0.04 – 0.07
0.570
-0.01
-0.04 – 0.02
0.569
Height
-0.09
-0.17 – 0.00
0.062
0.01
-0.09 – 0.11
0.801
-0.08
-0.14 – -0.02
0.007
6-11 months
0.05
-1.48 – 1.58
0.951
0.70
-0.94 – 2.34
0.402
0.25
-0.87 – 1.37
0.657
12-17 months
-0.97
-2.49 – 0.54
0.209
1.05
-0.49 – 2.59
0.181
0.77
-0.30 – 1.83
0.159
18-23 months
-0.68
-2.58 – 1.23
0.484
2.11
-0.30 – 4.53
0.087
-0.66
-2.15 – 0.82
0.381
24+ months
1.57
-0.07 – 3.21
0.060
1.04
-0.96 – 3.03
0.307
0.15
-0.99 – 1.29
0.795
Observations
2342
1464
3534
R2 / R2 adjusted
0.025 / 0.022
0.013 / 0.009
0.036 / 0.034
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity ok, constant variance meh (heteroscedasticity)
plot (CFQ_fit_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (CFQ_fit_preF, which = 3 ) # Linearity good, don't have constant variance
Code
# formal test in the form of Breusch-Pagan
bptest (CFQ_fit_preF) # small p-value, we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: CFQ_fit_preF
BP = 36.591, df = 7, p-value = 5.607e-06
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CFQ_fit_preF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CFQ_fit_preF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CFQ_fit_preF)
W = 0.98238, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (CFQ_fit_preF, which = 4 , id.n = 10 )
Code
# inspect the largest
model.data <- augment (CFQ_fit_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rowna…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 275 60 -6.02 49.8 5.24 2 27.7 32.3 0.0122 13.1
2 4065 73 -17.0 35.8 -10.8 1 32.7 40.3 0.0111 13.1
3 7774 82 -22.0 -20.2 -6.76 3 32.5 49.5 0.00522 13.1
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CFQ_Total, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CFQ_Total), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 19 × 13
.rown…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 671 78 -18.0 -17.2 -12.8 0 32.7 45.3 0.00170 13.1
2 1146 76 -17.0 -23.2 -4.76 1 31.8 44.2 0.00338 13.1
3 1366 87 -15.0 -11.2 -9.76 0 31.7 55.3 0.00120 13.1
4 1414 77 -23.0 -8.19 0.240 2 31.9 45.1 0.00363 13.1
5 1776 70 -16.0 8.81 3.24 2 30.1 39.9 0.00389 13.1
6 3168 72 -14.0 19.8 -0.760 2 30.0 42.0 0.00487 13.1
7 3514 86 -17.0 -15.2 -11.8 4 33.9 52.1 0.00359 13.1
8 3611 92 -18.0 -13.2 -8.76 4 33.9 58.1 0.00334 13.1
9 3662 83 -17.0 -23.2 -15.8 4 34.2 48.8 0.00450 13.1
10 3812 82 -17.0 14.8 -8.76 0 32.3 49.7 0.00354 13.1
11 4065 73 -17.0 35.8 -10.8 1 32.7 40.3 0.0111 13.1
12 4595 78 -16.0 6.81 1.24 0 31.2 46.8 0.00202 13.1
13 5244 92 -21.0 -20.2 -8.76 2 32.1 59.9 0.00325 13.1
14 5572 72 -1.02 -3.19 -1.76 4 29.2 42.8 0.00421 13.1
15 6447 75 -14.0 -8.19 6.24 0 30.1 44.9 0.00278 13.1
16 6780 74 -18.0 6.81 -10.8 4 34.3 39.7 0.00482 13.1
17 6859 72 -16.0 -18.2 -8.76 3 31.2 40.8 0.00481 13.1
18 7737 73 -19.0 -6.19 -2.76 3 31.5 41.5 0.00475 13.1
19 7774 82 -22.0 -20.2 -6.76 3 32.5 49.5 0.00522 13.1
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CFQ_Total, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
#Cook's distance
cooksd = cooks.distance (CFQ_fit_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# (log) transform to pass normality.
data_scale2 <- data_scale
data_scale2$ CFQ_Total <- log (data_scale2$ CFQ_Total)
table (data_scale2$ CFQ_Total)
-Inf 0 0.693147180559945 1.09861228866811
29 20 29 34
1.38629436111989 1.6094379124341 1.79175946922805 1.94591014905531
31 66 53 73
2.07944154167984 2.19722457733622 2.30258509299405 2.39789527279837
86 91 108 121
2.484906649788 2.56494935746154 2.63905732961526 2.70805020110221
147 143 150 180
2.77258872223978 2.83321334405622 2.89037175789616 2.94443897916644
193 199 229 246
2.99573227355399 3.04452243772342 3.09104245335832 3.13549421592915
230 266 225 290
3.17805383034795 3.2188758248682 3.25809653802148 3.29583686600433
262 293 273 269
3.3322045101752 3.36729582998647 3.40119738166216 3.43398720448515
265 279 252 236
3.46573590279973 3.49650756146648 3.52636052461616 3.55534806148941
190 218 205 175
3.58351893845611 3.61091791264422 3.63758615972639 3.66356164612965
176 162 130 111
3.68887945411394 3.71357206670431 3.73766961828337 3.76120011569356
130 113 84 105
3.78418963391826 3.80666248977032 3.8286413964891 3.85014760171006
60 76 62 59
3.87120101090789 3.89182029811063 3.91202300542815 3.93182563272433
50 44 45 35
3.95124371858143 3.97029191355212 3.98898404656427 4.00733318523247
25 36 21 18
4.02535169073515 4.04305126783455 4.06044301054642 4.07753744390572
19 17 19 18
4.0943445622221 4.11087386417331 4.12713438504509 4.14313472639153
15 17 8 9
4.15888308335967 4.17438726989564 4.18965474202643 4.20469261939097
5 3 9 7
4.21950770517611 4.23410650459726 4.24849524204936 4.26267987704132
5 2 3 4
4.27666611901606 4.29045944114839 4.30406509320417 4.31748811353631
4 2 1 1
4.33073334028633 4.34380542185368 4.35670882668959 4.36944785246702
1 2 2 1
4.39444915467244 4.40671924726425 4.4188406077966 4.45434729625351
1 2 1 1
4.46590811865458 4.52178857704904
1 2
Code
data_scale2[data_scale2 <= - Inf ] <- NA # Replace -Inf with NA
table (data_scale2$ CFQ_Total)
0 0.693147180559945 1.09861228866811 1.38629436111989
20 29 34 31
1.6094379124341 1.79175946922805 1.94591014905531 2.07944154167984
66 53 73 86
2.19722457733622 2.30258509299405 2.39789527279837 2.484906649788
91 108 121 147
2.56494935746154 2.63905732961526 2.70805020110221 2.77258872223978
143 150 180 193
2.83321334405622 2.89037175789616 2.94443897916644 2.99573227355399
199 229 246 230
3.04452243772342 3.09104245335832 3.13549421592915 3.17805383034795
266 225 290 262
3.2188758248682 3.25809653802148 3.29583686600433 3.3322045101752
293 273 269 265
3.36729582998647 3.40119738166216 3.43398720448515 3.46573590279973
279 252 236 190
3.49650756146648 3.52636052461616 3.55534806148941 3.58351893845611
218 205 175 176
3.61091791264422 3.63758615972639 3.66356164612965 3.68887945411394
162 130 111 130
3.71357206670431 3.73766961828337 3.76120011569356 3.78418963391826
113 84 105 60
3.80666248977032 3.8286413964891 3.85014760171006 3.87120101090789
76 62 59 50
3.89182029811063 3.91202300542815 3.93182563272433 3.95124371858143
44 45 35 25
3.97029191355212 3.98898404656427 4.00733318523247 4.02535169073515
36 21 18 19
4.04305126783455 4.06044301054642 4.07753744390572 4.0943445622221
17 19 18 15
4.11087386417331 4.12713438504509 4.14313472639153 4.15888308335967
17 8 9 5
4.17438726989564 4.18965474202643 4.20469261939097 4.21950770517611
3 9 7 5
4.23410650459726 4.24849524204936 4.26267987704132 4.27666611901606
2 3 4 4
4.29045944114839 4.30406509320417 4.31748811353631 4.33073334028633
2 1 1 1
4.34380542185368 4.35670882668959 4.36944785246702 4.39444915467244
2 2 1 1
4.40671924726425 4.4188406077966 4.45434729625351 4.46590811865458
2 1 1 1
4.52178857704904
2
Code
CFQFfactor_fix <- lm (CFQ_Total~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
qqnorm (resid (CFQFfactor_fix), col = "grey" )
qqline (resid (CFQFfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CFQFfactor_fix))
Shapiro-Wilk normality test
data: resid(CFQFfactor_fix)
W = 0.9088, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see below.
#multicollinearity
car:: vif (CFQ_fit_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.065997 1 1.032471
Weight 1.201466 1 1.096114
Height 1.134895 1 1.065314
Intervention_months_factor 1.003608 4 1.000450
Postmenopausal females:
Code
#### FEMALE post
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity good, constant variance looks good (heteroscedasticity)
plot (CFQ_fit_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CFQ_fit_postF, which = 3 ) # Linearity good, constant variance
Code
# formal test in the form of Breusch-Pagan
bptest (CFQ_fit_postF) # we fail to reject the null of homoscedasticity. The constant variance assumption is not violated.
studentized Breusch-Pagan test
data: CFQ_fit_postF
BP = 12.905, df = 7, p-value = 0.07445
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CFQ_fit_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CFQ_fit_postF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CFQ_fit_postF)
W = 0.99081, p-value = 6.045e-08
Code
### OUTLIERS
#plot 3largest
plot (CFQ_fit_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CFQ_fit_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rowna…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3699 2 3.98 10.8 -3.76 3 29.6 -27.6 0.0139 11.2
2 3940 0 6.98 -19.2 -22.8 3 28.4 -28.4 0.0160 11.2
3 4433 79 3.98 -8.19 -12.8 4 28.1 50.9 0.00927 11.1
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CFQ_Total, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CFQ_Total), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 4 × 13
.rowna…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 695 72 15.0 11.8 -6.76 0 25.7 46.3 0.00331 11.1
2 1869 61 11.0 7.81 -7.76 2 27.3 33.7 0.00459 11.2
3 4433 79 3.98 -8.19 -12.8 4 28.1 50.9 0.00927 11.1
4 4695 60 16.0 0.811 -4.76 2 26.5 33.5 0.00384 11.2
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CFQ_Total, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
#Cook's distance
cooksd = cooks.distance (CFQ_fit_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
CFQ_postF_factor_fix <- lm (CFQ_Total~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (CFQ_postF_factor_fix), col = "grey" )
qqline (resid (CFQ_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CFQ_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(CFQ_postF_factor_fix)
W = 0.93056, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CFQ_fit_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.035602 1 1.017645
Weight 1.153175 1 1.073860
Height 1.177418 1 1.085089
Intervention_months_factor 1.012313 4 1.001531
Males:
Code
### MALE
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot (CFQ_fitM, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CFQ_fitM, which = 3 ) # weird pattern below
Code
# formal test in the form of Breusch-Pagan
bptest (CFQ_fitM)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: CFQ_fitM
BP = 63.672, df = 7, p-value = 2.779e-11
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CFQ_fitM, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CFQ_fitM)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CFQ_fitM)
W = 0.98849, p-value = 2.327e-16
Code
### OUTLIERS
#plot 3largest
plot (CFQ_fitM, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CFQ_fitM) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rowna…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 897 66 -9.02 66.8 23.2 2 25.0 41.0 0.00869 11.3
2 2736 65 -18.0 56.8 10.2 0 26.7 38.3 0.00731 11.3
3 5561 66 29.0 -7.19 -5.76 4 22.1 43.9 0.00388 11.3
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CFQ_Total, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CFQ_Total), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 22 × 13
.rown…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 309 59 5.98 11.8 1.24 0 24.5 34.5 9.22e-4 11.3
2 897 66 -9.02 66.8 23.2 2 25.0 41.0 8.69e-3 11.3
3 1214 61 15.0 13.8 5.24 1 23.1 37.9 2.20e-3 11.3
4 1266 59 6.98 1.81 12.2 2 24.3 34.7 2.17e-3 11.3
5 1730 81 4.98 -14.2 -3.76 0 25.3 55.7 1.61e-3 11.3
6 2505 61 11.0 -3.19 -2.76 3 23.6 37.4 4.47e-3 11.3
7 2736 65 -18.0 56.8 10.2 0 26.7 38.3 7.31e-3 11.3
8 2756 67 -24.0 -8.19 -11.8 0 29.9 37.1 3.94e-3 11.3
9 2879 68 -18.0 1.81 8.24 4 27.5 40.5 2.62e-3 11.3
10 3045 61 -18.0 11.8 18.2 4 26.6 34.4 3.14e-3 11.3
# … with 12 more rows, 3 more variables: .cooksd <dbl>, .std.resid <dbl>,
# index <int>, and abbreviated variable names ¹.rownames, ²CFQ_Total,
# ³Weight[,1], ⁴Height[,1], ⁵Intervention_months_factor
Code
#Cook's distance
cooksd = cooks.distance (CFQ_fitM)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
CFQMfactor_fix <- lm (CFQ_Total~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Male" )
qqnorm (resid (CFQMfactor_fix), col = "grey" )
qqline (resid (CFQMfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CFQMfactor_fix))
Shapiro-Wilk normality test
data: resid(CFQMfactor_fix)
W = 0.89562, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CFQ_fitM) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.091034 1 1.044526
Weight 1.270075 1 1.126976
Height 1.250205 1 1.118126
Intervention_months_factor 1.007324 4 1.000913
We conclude that the assumptions were violated and thus we use robust models.
Robust models
Code
CFQ_preF_robust <- lmrob (CFQ_Total ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CFQ_postF_robust <- lmrob (CFQ_Total ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
CFQ_M_robust <- lmrob (CFQ_Total ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (CFQ_preF_robust, CFQ_postF_robust, CFQ_M_robust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Cognitive functioning, higher means more cognitive failure" , digits = 3 )
Cognitive functioning, higher means more cognitive failure
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
27.197
25.990 – 28.404
<0.001
27.331
25.745 – 28.918
<0.001
25.049
24.365 – 25.732
<0.001
Age
-0.210
-0.280 – -0.141
<0.001
-0.148
-0.240 – -0.056
0.002
-0.126
-0.153 – -0.098
<0.001
Weight
-0.005
-0.053 – 0.042
0.826
0.000
-0.053 – 0.053
0.997
-0.021
-0.059 – 0.016
0.269
Height
-0.073
-0.161 – 0.015
0.102
0.014
-0.087 – 0.114
0.791
-0.069
-0.129 – -0.008
0.026
6-12 months
0.144
-1.354 – 1.642
0.851
0.873
-0.767 – 2.512
0.297
0.047
-1.037 – 1.130
0.933
12-18 months
-1.458
-2.942 – 0.025
0.054
1.084
-0.502 – 2.670
0.180
0.807
-0.285 – 1.898
0.148
18-24 months
-0.924
-2.617 – 0.769
0.285
2.588
-0.055 – 5.231
0.055
-0.591
-2.138 – 0.955
0.454
24+ months
0.866
-0.823 – 2.555
0.315
0.859
-1.123 – 2.842
0.395
-0.238
-1.375 – 0.900
0.682
Observations
2342
1464
3534
R2 / R2 adjusted
0.022 / 0.019
0.012 / 0.007
0.033 / 0.031
Physical health
Scored by https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/SF-36-RAND-36-form.pdf
Predict by ferritin
Code
res <- cor.test (data_scale$ SF_ph, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = SF_ph, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 75 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Physical health (SF-36), higher is better" , y= "Ferritin" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= SF_ph)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Physical health (SF-36), higher is better" )
Models
Code
SF_ph_preF <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_ph_postF <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_ph_M <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_ph_preF, SF_ph_postF, SF_ph_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Physical health (Sf-36), higher score indicates better health" , show.reflvl = T, digits= 3 )
Physical health (Sf-36), higher score indicates better health
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
90.543
89.548 – 91.539
<0.001
88.301
86.818 – 89.785
<0.001
91.230
90.733 – 91.728
<0.001
Age
0.079
0.026 – 0.132
0.004
-0.025
-0.110 – 0.060
0.562
-0.031
-0.050 – -0.011
0.002
Weight
-0.154
-0.190 – -0.118
<0.001
-0.190
-0.242 – -0.137
<0.001
-0.113
-0.138 – -0.087
<0.001
Height
0.146
0.080 – 0.212
<0.001
0.048
-0.047 – 0.144
0.321
0.117
0.073 – 0.162
<0.001
6-12 months
-0.203
-1.329 – 0.922
0.723
0.435
-1.167 – 2.038
0.594
0.099
-0.744 – 0.942
0.818
12-18 months
0.329
-0.783 – 1.441
0.562
-1.258
-2.771 – 0.255
0.103
-0.538
-1.344 – 0.269
0.191
18-24 months
-0.479
-1.878 – 0.920
0.502
-1.913
-4.272 – 0.446
0.112
-0.088
-1.210 – 1.033
0.877
24+ months
-0.496
-1.703 – 0.712
0.421
0.985
-0.977 – 2.947
0.325
-0.015
-0.879 – 0.848
0.972
Observations
2398
1468
3582
R2 / R2 adjusted
0.031 / 0.028
0.039 / 0.035
0.031 / 0.029
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance meh (heteroscedasticity)
plot (SF_ph_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (SF_ph_preF, which = 3 ) # Linearity bad, constant variance meh
Code
# formal test in the form of Breusch-Pagan
bptest (SF_ph_preF) # small p-value, we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_ph_preF
BP = 25.899, df = 7, p-value = 0.0005251
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_ph_preF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_ph_preF)) #reject the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_ph_preF)
W = 0.79775, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_ph_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_ph_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_ph Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3703 31.9 -2.02 24.8 8.24 3 87.3 -55.4 0.00862 9.71
2 4034 21.9 -22.0 -7.19 -14.8 3 87.3 -65.4 0.00592 9.68
3 4065 13.8 -17.0 35.8 -10.8 1 81.9 -68.1 0.0109 9.67
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_ph), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 20 × 13
.rownames SF_ph Age[,1] Weigh…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 936 43.1 -8.02 9.81 -16.8 2 86.3 -43.2 0.00561 9.73
2 1407 23.8 -15.0 -17.2 -8.76 2 91.1 -67.3 0.00266 9.68
3 1659 28.1 -12.0 26.8 1.24 0 85.6 -57.5 0.00463 9.70
4 2032 34.4 -14.0 -11.2 -7.76 0 90.0 -55.6 0.00106 9.71
5 2111 41.2 -21.0 -5.19 3.24 0 90.2 -48.9 0.00226 9.72
6 2271 48.7 -21.0 -13.2 -7.76 0 89.8 -41.1 0.00141 9.74
7 2998 50.6 -22.0 -26.2 -11.8 4 90.6 -40.1 0.00443 9.74
8 3703 31.9 -2.02 24.8 8.24 3 87.3 -55.4 0.00862 9.71
9 3877 50.0 -19.0 -0.189 9.24 0 90.4 -40.4 0.00359 9.74
10 4034 21.9 -22.0 -7.19 -14.8 3 87.3 -65.4 0.00592 9.68
11 4065 13.8 -17.0 35.8 -10.8 1 81.9 -68.1 0.0109 9.67
12 4595 42.4 -16.0 6.81 1.24 0 88.4 -46.0 0.00199 9.73
13 5649 38.7 -1.02 -4.19 -18.8 0 88.4 -49.7 0.00427 9.72
14 6172 32.5 -21.0 4.81 -16.8 1 85.5 -53.0 0.00580 9.71
15 6286 52.5 -5.02 -10.2 -0.760 2 91.9 -39.5 0.00331 9.74
16 6414 45.0 -12.0 -20.2 -9.76 0 91.3 -46.3 0.00158 9.73
17 6859 47.5 -16.0 -18.2 -8.76 3 90.3 -42.9 0.00466 9.73
18 7026 42.6 -6.02 -13.2 -8.76 0 90.8 -48.2 0.00169 9.72
19 7246 44.4 -22.0 -13.2 -15.8 1 88.4 -43.9 0.00401 9.73
20 7301 43.2 -19.0 -18.2 -2.76 1 91.3 -48.1 0.00315 9.72
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_ph_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
data_scale2 <- data_scale
table (data_scale2$ SF_ph)
13.8010204081633 15.0510204081633 15.1020408163265 16.25
1 1 1 1
21.9132653061224 23.1122448979592 23.8010204081633 23.8520408163265
1 1 1 1
25.6122448979592 26.8622448979592 27.4744897959184 28.1122448979592
1 1 1 1
29.3622448979592 30.6122448979592 31.1734693877551 31.9132653061224
1 1 1 2
32.4744897959184 32.6020408163265 33.1632653061225 33.8010204081633
1 1 1 1
34.4132653061224 35.6122448979592 35.6632653061224 37.4234693877551
1 1 1 1
38.7244897959184 39.3622448979592 39.9744897959184 40.0255102040816
3 2 1 1
40.0510204081633 40.6122448979592 40.6632653061224 41.1734693877551
1 2 1 1
41.2244897959184 42.4234693877551 42.4744897959184 42.6020408163265
4 1 2 1
43.0867346938775 43.1632653061224 43.7244897959184 44.3367346938775
1 1 4 1
44.4132653061224 44.9744897959184 45.1020408163265 45.6122448979592
2 3 1 1
46.2244897959184 46.2755102040816 46.8367346938775 47.4744897959184
1 1 1 3
48.0357142857143 48.0867346938775 48.6734693877551 48.7244897959184
2 2 1 4
48.7755102040816 49.4132653061224 49.9744897959184 50.5357142857143
2 2 2 2
50.5867346938775 50.6122448979592 51.1734693877551 51.2244897959184
3 1 1 2
51.2755102040816 51.7857142857143 51.8367346938775 52.4744897959184
2 1 3 6
52.5255102040816 52.984693877551 53.0357142857143 53.0867346938775
1 1 3 5
53.1632653061224 53.7244897959184 53.7755102040816 54.3367346938775
1 3 1 4
54.3622448979592 54.3877551020408 54.4642857142857 54.9744897959184
2 1 1 1
55.0255102040816 55.5357142857143 55.5867346938775 55.6122448979592
2 2 6 1
55.6377551020408 55.6632653061224 55.765306122449 56.2244897959184
1 2 1 3
56.2755102040816 56.3010204081633 56.7857142857143 56.8367346938775
2 1 3 2
56.8877551020408 56.9132653061224 57.4234693877551 57.4744897959184
1 2 1 5
58.0357142857143 58.0867346938775 58.1122448979592 58.1377551020408
5 6 1 4
58.6989795918367 58.7244897959184 58.75 59.234693877551
1 4 2 1
59.2857142857143 59.3367346938775 59.3622448979592 59.3877551020408
3 4 1 1
59.8979591836735 59.9744897959184 60.5357142857143 60.5867346938775
2 4 2 5
60.6122448979592 60.6377551020408 61.0969387755102 61.1479591836735
1 4 2 1
61.2244897959184 61.25 61.7857142857143 61.8367346938775
3 1 2 8
61.8622448979592 61.8877551020408 62.4489795918367 62.4744897959184
1 3 1 1
63.0357142857143 63.0867346938775 63.1377551020408 63.1887755102041
6 7 3 1
63.2142857142857 63.6479591836735 63.6734693877551 63.6989795918367
1 1 1 2
63.7244897959184 63.75 64.234693877551 64.2857142857143
2 2 1 6
64.3367346938775 64.3877551020408 64.4132653061224 64.8469387755102
3 2 1 1
64.8979591836735 64.9744897959184 65 65.0255102040816
4 3 1 1
65.5357142857143 65.5867346938775 65.6377551020408 66.1479591836735
2 3 1 3
66.1989795918367 66.2244897959184 66.25 66.2755102040816
1 3 2 1
66.734693877551 66.7857142857143 66.8367346938775 66.8622448979592
1 4 8 1
66.8877551020408 67.3979591836735 67.4489795918367 67.4744897959184
2 5 2 3
67.5 68.0357142857143 68.0867346938775 68.1122448979592
2 1 9 1
68.1377551020408 68.5969387755102 68.6479591836735 68.6989795918367
2 4 1 7
68.7244897959184 68.75 69.234693877551 69.3367346938775
1 4 1 7
69.3877551020408 69.4387755102041 69.9489795918367 69.9744897959184
2 1 1 5
70.4336734693878 70.5357142857143 70.5867346938775 70.6377551020408
1 2 10 5
70.6632653061224 71.0969387755102 71.2244897959184 71.25
1 1 1 2
71.2755102040816 71.7857142857143 71.8367346938775 71.9132653061224
1 3 10 1
72.3979591836735 72.4489795918367 72.4744897959184 72.5
2 1 2 3
72.6275510204082 72.984693877551 73.0357142857143 73.0867346938775
1 1 10 20
73.1377551020408 73.1887755102041 73.2142857142857 73.5969387755102
7 1 1 1
73.6479591836735 73.6734693877551 73.6989795918367 73.7244897959184
6 1 3 2
73.75 73.7755102040816 73.8265306122449 74.234693877551
7 2 1 1
74.2857142857143 74.3367346938775 74.3877551020408 74.8469387755102
8 17 6 1
74.8979591836735 74.9489795918367 74.9744897959184 75
2 1 2 6
75.0255102040816 75.484693877551 75.5357142857143 75.5612244897959
1 1 10 1
75.5867346938775 75.6377551020408 75.6887755102041 75.9948979591837
18 12 1 1
76.0969387755102 76.1479591836735 76.1989795918367 76.2244897959184
1 2 7 3
76.25 76.2755102040816 76.6836734693878 76.7857142857143
8 2 1 5
76.8367346938775 76.8622448979592 76.8877551020408 76.9387755102041
17 1 13 2
76.9642857142857 77.2959183673469 77.3469387755102 77.3979591836735
1 1 2 7
77.4489795918367 77.4744897959184 77.5 78.0357142857143
3 1 9 10
78.0867346938775 78.1377551020408 78.5969387755102 78.6479591836735
33 15 1 6
78.6734693877551 78.6989795918367 78.7244897959184 78.75
1 9 3 15
78.7755102040816 79.2857142857143 79.3367346938775 79.3877551020408
1 14 31 13
79.8469387755102 79.8979591836735 79.9489795918367 79.9744897959184
1 5 10 1
80 80.0255102040816 80.0765306122449 80.484693877551
9 1 1 1
80.5357142857143 80.5867346938775 80.6377551020408 80.6632653061224
20 56 20 1
80.6887755102041 81.0969387755102 81.1479591836735 81.1989795918367
3 1 9 14
81.2244897959184 81.25 81.3265306122449 81.734693877551
2 16 1 1
81.7857142857143 81.8367346938775 81.8877551020408 81.9387755102041
17 53 24 2
82.3469387755102 82.3979591836735 82.4489795918367 82.5
1 15 20 17
82.5255102040816 82.984693877551 83.0357142857143 83.0867346938775
1 1 11 53
83.1377551020408 83.1887755102041 83.5969387755102 83.6479591836735
28 3 3 23
83.6989795918367 83.75 83.7755102040816 83.8265306122449
19 27 1 1
83.8775510204082 84.2857142857143 84.3367346938775 84.3877551020408
1 6 68 45
84.4387755102041 84.8469387755102 84.8979591836735 84.9489795918367
6 4 42 39
85 85.0255102040816 85.484693877551 85.5357142857143
36 1 1 5
85.5867346938775 85.6377551020408 85.6887755102041 86.0969387755102
60 47 1 5
86.1479591836735 86.1989795918367 86.2244897959184 86.25
40 64 1 55
86.734693877551 86.7857142857143 86.8367346938775 86.8877551020408
1 7 39 56
86.9387755102041 87.3469387755102 87.3979591836735 87.4489795918367
6 3 42 103
87.5 87.5255102040816 88.0357142857143 88.0867346938775
89 1 3 27
88.1377551020408 88.1887755102041 88.5969387755102 88.6479591836735
54 3 1 45
88.6989795918367 88.75 89.2857142857143 89.3367346938775
130 109 2 23
89.3877551020408 89.4387755102041 89.8469387755102 89.8979591836735
50 4 8 45
89.9489795918367 90 90.5867346938775 90.6377551020408
169 207 19 41
90.6887755102041 91.0969387755102 91.1479591836735 91.1989795918367
7 2 43 203
91.25 91.3775510204082 91.8367346938775 91.8877551020408
268 1 9 36
91.9387755102041 92.3469387755102 92.3979591836735 92.4489795918367
3 1 25 192
92.5 93.1377551020408 93.1887755102041 93.6479591836735
391 29 3 16
93.6989795918367 93.75 94.3877551020408 94.4387755102041
189 450 6 4
94.8979591836735 94.9489795918367 95 95.6887755102041
13 157 599 3
96.1989795918367 96.25 97.4489795918367 97.5
114 551 48 576
98.75 100
518 429
Code
data_scale2$ SF_ph <- log10 (data_scale$ SF_ph)
table (data_scale2$ SF_ph)
1.13991119808611 1.17756594462169 1.17903563970246 1.21085336531489
1 1 1 1
1.34070709681079 1.36384213065636 1.37659557672604 1.37752554385206
1 1 1 1
1.40844764578854 1.42914230416503 1.43892963627752 1.44889552749531
1 1 1 1
1.46778925660933 1.48589517902717 1.49378513888608 1.50397124267296
1 1 1 2
1.5115423366332 1.51324478680192 1.52065728528638 1.52892981125237
1 1 1 1
1.53672588265145 1.55159935126669 1.55222110438921 1.57314404682283
1 1 1 1
1.587985704539 1.59507985904269 1.60178292944813 1.60233687656648
3 2 1 1
1.60261358538878 1.60865699638119 1.60920225003964 1.61461746336559
1 2 1 1
1.61515528941811 1.62760618219906 1.62812817082188 1.62943040412713
4 1 2 1
1.63434358255055 1.63511429168255 1.64072475056672 1.64676370509219
1 1 4 1
1.64751270409687 1.65296624527886 1.6541961936566 1.65908144743944
2 3 1 1
1.66487212632034 1.66535121570362 1.67058660984477 1.67646030611031
1 1 1 3
1.68156425299621 1.68202528752135 1.68729230334762 1.68774730022727
2 2 1 4
1.68820182091962 1.69384355369865 1.69874836897428 1.70359840851809
2 2 2 2
1.70403664718485 1.7042556007977 1.70904486166394 1.70947764145252
3 1 1 2
1.70990999040003 1.71420997089276 1.71463763659142 1.71994822467427
2 1 3 6
1.72037027959757 1.72415042951464 1.72456842231101 1.72498601319117
1 1 3 5
1.72561164760703 1.73017229982901 1.73058453952005 1.73509353641828
1 3 1 4
1.73529738269374 1.73550113333408 1.73611181234059 1.74016120747629
2 1 1 1
1.74056407808209 1.74457236202064 1.7449711632258 1.74517042658415
2 2 6 1
1.74536959855824 1.74556867923187 1.74636409059323 1.74992552315929
1 2 1 3
1.75031944108371 1.7505162661412 1.75423909297823 1.75462911948123
2 1 3 2
1.7550187960277 1.75521350326338 1.75908942798006 1.75947512470337
1 2 1 5
1.76369533397267 1.76407696359469 1.76426765272262 1.76445825815992
5 6 1 4
1.76863055164819 1.76881925227332 1.76900787094377 1.7725761483821
1 4 2 1
1.77295005669784 1.77332364337197 1.77351031626627 1.77369690895739
3 4 1 1
1.77741202555512 1.77796656210448 1.78201167119688 1.78237754694043
2 4 2 5
1.7825603692887 1.78274311470772 1.78601945073012 1.7863819670132
1 4 2 1
1.78692517469115 1.78710609303657 1.79088807178658 1.79124654847379
3 1 2 8
1.79142567591783 1.7916047295101 1.79552534645307 1.79570271810426
1 3 1 1
1.79958667838162 1.79993804934084 1.80028913624913 1.80063993956538
6 7 3 1
1.80081523501959 1.80378448293895 1.80395851398993 1.80413247533089
1 1 1 2
1.80430636701766 1.80448018910599 1.80776965875139 1.80811447376109
2 2 1 6
1.80845901521661 1.80880328355164 1.80897531543422 1.81188947919753
3 2 1 1
1.81223103995592 1.81274287794316 1.81291335664286 1.81308376844881
4 3 1 1
1.81647803724589 1.8168160096224 1.81715371918989 1.82051644974889
2 3 1 3
1.82085129516402 1.82101862110787 1.82118588260885 1.82135307971655
1 3 2 1
1.82435167263177 1.82468357519428 1.82501522429929 1.82518095392614
1 4 8 1
1.82534662033361 1.82864674625805 1.82897538379315 1.82913960935075
2 5 2 3
1.82930377283102 1.83273694866942 1.83306250676705 1.83322519434412
2 1 9 1
1.83338782100092 1.83630473520284 1.8366276307433 1.83695028639105
2 4 1 7
1.83711152436651 1.8372727025023 1.84032377630326 1.84096338537602
1 4 1 7
1.84128283701374 1.84160205384686 1.84478138343304 1.84493974058407
2 1 1 5
1.84778033961881 1.84840906862026 1.84872309212049 1.84903688872512
1 2 10 5
1.84919370204399 1.85185090169285 1.85262934693067 1.85278486868055
1 1 1 2
1.85294033475771 1.85603802607827 1.85634658344962 1.85680900885115
1 3 10 1
1.859726324101 1.86003227302658 1.86018516670248 1.86033800657099
2 1 2 3
1.8611014001265 1.86323179078481 1.86353528100114 1.86383855928295
1 1 10 20
1.86414162592603 1.86444448122554 1.86459582971354 1.86685975047129
7 1 1 1
1.86716071686026 1.86731112187714 1.86746147482374 1.86761177573609
6 1 3 2
1.8677620246502 1.86791222160204 1.86821245976256 1.87060692196545
7 2 1 1
1.87090530362054 1.87120348041351 1.87150145262548 1.87417404248681
8 17 6 1
1.87446998422358 1.87476572443378 1.87491351905216 1.8750612633917
2 1 2 6
1.87520895748661 1.87785889814018 1.87815234036884 1.87829898716473
1 1 10 1
1.87844558445959 1.87873863067982 1.87903147929638 1.88078443619459
18 12 1 1
1.88136718634161 1.88165826844493 1.88194915558367 1.8820945261229
1 2 7 3
1.88223984801882 1.88238512130397 1.88470290923043 1.88528042857339
8 2 1 5
1.88556890050821 1.8857130646529 1.88585718095816 1.88614527017728
17 1 13 2
1.88628924315453 1.88815656148185 1.88844312993956 1.88872950943025
1 1 2 7
1.88901570020299 1.88915872489781 1.88930170250631 1.89229340996422
3 1 9 10
1.89257726257688 1.89286092978612 1.89540563129648 1.89568745770605
33 15 1 6
1.89582830235846 1.8959691013488 1.89610985470667 1.89625056246164
1 9 3 15
1.89639122464324 1.89919494310842 1.89947432200638 1.89975352129719
1 14 31 13
1.90225827052599 1.90253568636545 1.90281292511211 1.90295147814628
1 5 10 1
1.90308998699194 1.90322845167729 1.90350524867959 1.9057132965597
9 1 1 1
1.90598851487176 1.90626355888469 1.90653842881912 1.90667579857573
20 56 20 1
1.90681312489527 1.90900446089432 1.90927760208691 1.90955057160055
3 1 9 14
1.90968699204517 1.90982336965091 1.91023224570362 1.91240644039174
2 16 1 1
1.91267745099767 1.91294829259167 1.91321896538441 1.91348946958619
17 53 24 2
1.91564745902958 1.91591645531065 1.91618528508209 1.91645394854993
1 15 20 17
1.91658821798426 1.9189979962614 1.91926492588375 1.91953169154442
1 1 11 53
1.91979829344469 1.9200647317855 1.92219037436192 1.92245534964891
28 3 3 23
1.92272016336559 1.92298481570888 1.92311708142695 1.92338149207859
19 27 1 1
1.92364574184756 1.92575397162789 1.92601678221497 1.92627943386005
1 6 68 45
1.92654192675526 1.92863617786304 1.92889725059823 1.92915816648586
6 4 42 39
1.92941892571429 1.92954924664007 1.93188836081481 1.93214748640836
36 1 1 5
1.93240645748455 1.93266527422756 1.93292393682121 1.93498771014659
60 47 1 5
1.93524499361495 1.93550212475444 1.9356306332572 1.93575910374531
40 64 1 55
1.9381928500218 1.93844824225609 1.93870348439209 1.93895857660613
1 7 39 56
1.93921351907421 1.94124768898466 1.94150129160903 1.9417547462307
6 3 42 103
1.94200805302231 1.94213465103572 1.94465889227103 1.944910511329
89 1 3 27
1.94516198468976 1.94541331252195 1.94741871629031 1.94766874190568
54 3 1 45
1.9479186236628 1.94816836172713 1.95078197732982 1.95103007472697
130 109 2 23
1.95127803047559 1.95152584473732 1.9535032846108 1.95374983271955
50 4 8 45
1.95399624094285 1.95424250943932 1.95706460528116 1.95730914046887
169 207 19 41
1.95755353804533 1.95950378317232 1.95974694918198 1.95998997911664
7 2 43 203
1.96023287312851 1.96083951449256 1.96301643374683 1.96325764146306
268 1 9 36
1.96349871528657 1.96542250351271 1.96566237895758 1.96590212198432
3 1 25 192
1.96614173273903 1.96912576592927 1.96936360519146 1.97149831748353
391 29 3 16
1.97173486132484 1.97197127639976 1.97491565704654 1.97515034739643
189 450 6 4
1.97725687286144 1.97749030177429 1.97772360528885 1.98086099713027
13 157 599 3
1.98317046538516 1.98340073818054 1.98877729589125 1.98900461569854
114 551 48 576
1.9945371042985 2
518 429
Code
SF_ph_preF_factor_fix <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
qqnorm (resid (SF_ph_preF_factor_fix), col = "grey" )
qqline (resid (SF_ph_preF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_ph_preF_factor_fix))
Shapiro-Wilk normality test
data: resid(SF_ph_preF_factor_fix)
W = 0.67074, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_ph_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.065418 1 1.032191
Weight 1.206128 1 1.098239
Height 1.140722 1 1.068046
as.factor(Intervention_months_factor) 1.004341 4 1.000542
Postmenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance looks bad (heteroscedasticity)
plot (SF_ph_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_ph_postF, which = 3 ) # Linearity bad, constant variance bad
Code
# formal test in the form of Breusch-Pagan
bptest (SF_ph_postF) # reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_ph_postF
BP = 24.922, df = 7, p-value = 0.0007833
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_ph_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_ph_postF)) #reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_ph_postF)
W = 0.80407, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_ph_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_ph_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_ph Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 4246 16.2 14.0 11.8 7.24 4 87.0 -70.8 0.0107 10.8
2 4440 29.4 4.98 8.81 4.24 4 87.7 -58.3 0.00984 10.9
3 6837 32.6 6.98 -14.2 -13.8 3 88.2 -55.6 0.0123 10.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_ph), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 14 × 13
.rownames SF_ph Age[,1] Weigh…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 289 33.2 2.98 12.8 1.24 2 84.6 -51.4 0.00668 10.9
2 1423 40.7 4.98 -13.2 -6.76 2 89.1 -48.4 0.00496 10.9
3 2400 41.2 23.0 -8.19 -0.760 0 89.2 -48.0 0.00412 10.9
4 2408 27.5 17.0 6.81 -1.76 0 86.5 -59.0 0.00279 10.9
5 2658 38.7 28.0 -9.19 -19.8 0 88.4 -49.7 0.00639 10.9
6 3087 38.7 12.0 -7.19 -6.76 2 87.8 -49.1 0.00354 10.9
7 3814 23.9 6.98 9.81 -2.76 0 86.1 -62.3 0.00328 10.9
8 4246 16.2 14.0 11.8 7.24 4 87.0 -70.8 0.0107 10.8
9 4440 29.4 4.98 8.81 4.24 4 87.7 -58.3 0.00984 10.9
10 5472 35.6 11.0 14.8 7.24 0 85.6 -50.0 0.00585 10.9
11 5626 42.5 6.98 -1.19 -2.76 0 88.2 -45.7 0.00236 10.9
12 5887 40.0 9.98 17.8 2.24 1 85.2 -45.2 0.00693 10.9
13 6837 32.6 6.98 -14.2 -13.8 3 88.2 -55.6 0.0123 10.9
14 6898 41.2 15.0 -8.19 -10.8 3 87.0 -45.9 0.0107 10.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_ph_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
SF_ph_postF_factor_fix <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (SF_ph_postF_factor_fix), col = "grey" )
qqline (resid (SF_ph_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_ph_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(SF_ph_postF_factor_fix)
W = 0.68121, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_ph_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.033504 1 1.016614
Weight 1.150857 1 1.072780
Height 1.172847 1 1.082981
as.factor(Intervention_months_factor) 1.012156 4 1.001511
Males:
Code
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Bad
plot (SF_ph_M, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_ph_M, which = 3 ) # suspect
Code
# formal test in the form of Breusch-Pagan
bptest (SF_ph_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_ph_M
BP = 33.594, df = 7, p-value = 2.052e-05
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_ph_M, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_ph_M)) # reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_ph_M)
W = 0.77114, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_ph_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_ph_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_ph Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1657 15.1 20.0 1.81 -4.76 0 89.9 -74.7 0.00149 8.51
2 3057 15.1 13.0 71.8 2.24 4 83.0 -67.9 0.0117 8.53
3 3062 23.1 19.0 23.8 -11.8 2 86.0 -62.9 0.00532 8.54
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_ph), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 84 × 13
.rownames SF_ph Age[,1] Weigh…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 407 56.8 25.0 -3.19 3.24 2 90.7 -33.9 0.00263 8.59
2 437 40.6 -14.0 -3.19 12.2 2 92.9 -52.3 0.00252 8.56
3 440 49.4 13.0 -7.19 -3.76 2 90.7 -41.2 0.00256 8.58
4 581 63.1 -12.0 -7.19 1.24 0 92.6 -29.5 0.00124 8.59
5 635 47.5 20.0 9.81 1.24 0 89.7 -42.2 0.00103 8.58
6 662 40.0 24.0 16.8 1.24 0 88.7 -48.7 0.00138 8.57
7 755 26.9 -20.0 7.81 6.24 0 91.7 -64.8 0.00140 8.54
8 759 44.4 12.0 -6.19 3.24 0 91.9 -47.5 0.00103 8.57
9 823 58.0 16.0 46.8 7.24 0 86.3 -28.3 0.00388 8.59
10 825 59.3 27.0 -8.19 -9.76 0 90.2 -30.9 0.00258 8.59
# … with 74 more rows, 3 more variables: .cooksd <dbl>, .std.resid <dbl>,
# index <int>, and abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_ph_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
SF_ph_Mfactor_fix <- lm (SF_ph~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (SF_ph_Mfactor_fix), col = "grey" )
qqline (resid (SF_ph_Mfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_ph_Mfactor_fix))
Shapiro-Wilk normality test
data: resid(SF_ph_Mfactor_fix)
W = 0.68121, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_ph_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.092442 1 1.045200
Weight 1.274162 1 1.128788
Height 1.251161 1 1.118553
as.factor(Intervention_months_factor) 1.007564 4 1.000942
We conclude that the assumptions were violated and thus we use robust models.
Robust models
Code
SF_ph_preF_robust <- lmrob (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_ph_postF_robust <- lmrob (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_ph_Mrobust <- lmrob (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_ph_preF_robust, SF_ph_postF_robust, SF_ph_Mrobust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Physical health (Sf-36), higher score indicates better health (robust regression)" , digits = 3 )
Physical health (Sf-36), higher score indicates better health (robust regression)
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
92.473
91.820 – 93.126
<0.001
91.859
90.882 – 92.836
<0.001
93.180
92.842 – 93.519
<0.001
Age
0.056
0.021 – 0.090
0.001
-0.053
-0.106 – -0.000
0.049
-0.026
-0.038 – -0.013
<0.001
Weight
-0.098
-0.125 – -0.071
<0.001
-0.107
-0.140 – -0.073
<0.001
-0.067
-0.084 – -0.049
<0.001
Height
0.067
0.023 – 0.111
0.003
0.036
-0.023 – 0.095
0.228
0.056
0.026 – 0.086
<0.001
6-12 months
-0.042
-0.760 – 0.676
0.909
-0.331
-1.320 – 0.659
0.512
-0.425
-0.975 – 0.124
0.129
12-18 months
0.324
-0.391 – 1.038
0.375
-1.117
-2.143 – -0.092
0.033
-0.466
-1.001 – 0.069
0.088
18-24 months
0.106
-0.788 – 1.001
0.816
-1.014
-2.572 – 0.543
0.201
-0.352
-1.069 – 0.365
0.336
24+ months
-0.640
-1.471 – 0.192
0.131
0.777
-0.429 – 1.983
0.207
-0.119
-0.640 – 0.402
0.653
Observations
2398
1468
3582
R2 / R2 adjusted
0.032 / 0.029
0.040 / 0.035
0.032 / 0.030
Mental health
Coded using https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/SF-36-RAND-36-form.pdf
Predict by ferritin
Code
res <- cor.test (data_scale$ SF_mh, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = SF_ph, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 75 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Mental health (SF-36), higher is better" , y= "Ferritin" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= SF_mh)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Mental health (SF-36), higher is better" )
Models
Code
SF_mh_preF<- lm (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_mh_postF <- lm (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_mh_M <- lm (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_mh_preF, SF_mh_postF, SF_mh_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Mental health (Sf-36), higher score indicates better health" , show.reflvl = T, digits= 3 )
Mental health (Sf-36), higher score indicates better health
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
80.147
78.628 – 81.666
<0.001
77.548
75.935 – 79.161
<0.001
81.271
80.582 – 81.961
<0.001
Age
0.266
0.185 – 0.348
<0.001
0.268
0.176 – 0.361
<0.001
0.167
0.140 – 0.194
<0.001
Weight
-0.054
-0.109 – 0.001
0.054
-0.087
-0.144 – -0.031
0.003
-0.026
-0.061 – 0.009
0.142
Height
0.094
-0.007 – 0.194
0.068
0.102
-0.002 – 0.205
0.055
0.075
0.013 – 0.136
0.017
6-12 months
-1.196
-2.917 – 0.525
0.173
0.540
-1.203 – 2.283
0.543
-0.525
-1.694 – 0.644
0.378
12-18 months
1.321
-0.375 – 3.017
0.127
0.315
-1.330 – 1.960
0.707
-0.683
-1.799 – 0.433
0.230
18-24 months
-2.222
-4.358 – -0.087
0.041
-0.597
-3.168 – 1.974
0.649
-0.839
-2.392 – 0.713
0.289
24+ months
-0.744
-2.590 – 1.102
0.429
0.322
-1.804 – 2.449
0.766
-0.556
-1.750 – 0.638
0.361
Observations
2404
1476
3595
R2 / R2 adjusted
0.023 / 0.020
0.028 / 0.024
0.042 / 0.040
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance meh (heteroscedasticity)
plot (SF_mh_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (SF_mh_preF, which = 3 ) # Linearity bad, constant variance meh
Code
# formal test in the form of Breusch-Pagan
bptest (SF_mh_preF) # reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_mh_preF
BP = 26.608, df = 7, p-value = 0.0003921
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_mh_preF , 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_mh_preF)) #reject the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_mh_preF)
W = 0.82854, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_mh_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_mh_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_mh Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3925 18.2 -2.02 6.81 7.24 3 77.7 -59.4 0.00698 14.9
2 4065 11.2 -17.0 35.8 -10.8 1 71.5 -60.2 0.0109 14.9
3 7349 33.4 -13.0 49.8 -0.760 1 72.7 -39.3 0.0130 14.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_mh), almha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 5 × 13
.rownames SF_mh Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1659 15.6 -12.0 26.8 1.24 0 75.6 -60.0 0.00459 14.9
2 2029 11.9 -23.0 -13.2 -1.76 0 74.6 -62.7 0.00187 14.9
3 4065 11.2 -17.0 35.8 -10.8 1 71.5 -60.2 0.0109 14.9
4 4595 14.9 -16.0 6.81 1.24 0 75.6 -60.8 0.00197 14.9
5 6838 8.38 -20.0 -11.2 -1.76 3 73.0 -64.7 0.00478 14.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_mh_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
data_scale2 <- data_scale
table (data_scale2$ SF_mh)
3 8.375 11.25 11.875
1 1 1 1
12.5 13.75 14.875 15.25
1 1 1 1
15.625 16.25 16.875 17.125
1 1 1 2
17.25 17.5 18.25 18.375
2 1 1 1
19.25 19.5 20 20.5
2 1 1 2
20.625 21.125 22.25 22.3333333333333
1 1 1 1
22.625 23 24.125 24.25
3 1 1 1
24.5 24.625 24.75 24.875
1 1 1 1
25.5 25.625 25.75 25.875
2 1 3 1
25.9583333333333 26.25 26.375 26.5
1 2 2 2
26.75 27.125 27.25 27.625
1 2 2 2
27.75 28 28.2083333333333 28.25
2 2 1 1
28.3333333333333 28.375 28.5 28.625
1 1 2 2
29.25 29.625 29.7083333333333 29.9583333333333
1 2 1 2
30.125 30.2083333333333 30.25 30.5
2 1 1 2
30.75 31 31.125 31.25
1 2 1 2
31.5 31.625 31.875 32.125
3 1 4 3
32.25 32.3333333333333 32.375 32.5
2 1 1 1
32.75 32.875 33.125 33.375
3 1 2 2
33.5 33.5833333333333 33.625 33.7083333333333
3 1 2 1
33.75 33.875 33.9583333333333 34
4 6 1 1
34.125 34.25 34.375 34.5
1 1 5 1
34.625 34.7083333333333 34.75 34.875
2 2 1 1
35 35.125 35.2083333333333 35.25
3 4 1 2
35.3333333333333 35.375 35.5 35.625
1 2 2 1
35.75 36.0833333333333 36.125 36.2083333333333
1 1 2 1
36.25 36.375 36.625 36.75
2 1 7 1
36.875 36.9583333333333 37.0833333333333 37.125
2 1 1 1
37.4583333333333 37.5 37.625 37.875
1 1 4 1
37.9583333333333 38.25 38.375 38.4583333333333
1 1 2 4
38.5 38.5833333333333 38.625 38.75
2 1 1 1
38.8333333333333 38.875 38.9583333333333 39
1 2 1 1
39.125 39.2083333333333 39.25 39.375
1 1 1 1
39.5 39.5833333333333 39.625 39.75
2 1 2 2
39.875 40.0833333333333 40.125 40.2083333333333
1 1 1 1
40.3333333333333 40.375 40.5 40.5833333333333
2 1 1 1
40.625 40.7083333333333 40.75 40.875
1 1 3 2
41 41.125 41.1666666666667 41.2083333333333
2 3 1 1
41.25 41.4583333333333 41.5 41.5833333333333
1 1 1 1
41.7083333333333 41.75 41.8333333333333 41.9583333333333
3 2 2 1
42 42.0833333333333 42.125 42.2083333333333
1 3 3 2
42.25 42.3333333333333 42.4583333333333 42.5
1 1 3 1
42.7083333333333 42.75 42.875 42.9583333333333
1 9 4 1
43 43.125 43.25 43.3333333333333
1 2 2 1
43.375 43.5 43.625 43.7083333333333
2 2 1 1
43.75 43.9583333333333 44 44.125
4 2 3 2
44.1666666666667 44.2083333333333 44.25 44.3333333333333
1 2 1 2
44.375 44.4583333333333 44.625 44.7083333333333
2 2 1 1
44.8333333333333 45.2083333333333 45.25 45.3333333333333
1 1 2 1
45.375 45.4583333333333 45.5416666666667 45.5833333333333
4 1 1 3
45.7083333333333 45.875 46.5 46.5416666666667
2 1 2 1
46.5833333333333 46.625 46.7083333333333 46.75
3 1 2 2
46.8333333333333 46.9583333333333 47.0416666666667 47.125
3 1 1 1
47.2083333333333 47.25 47.4583333333333 47.5
1 2 2 3
47.5833333333333 47.6666666666667 47.7083333333333 47.8333333333333
1 1 1 2
47.875 47.9583333333333 48 48.125
3 1 2 1
48.2083333333333 48.25 48.2916666666667 48.3333333333333
6 1 1 1
48.4583333333333 48.5 48.5416666666667 48.5833333333333
2 5 1 1
48.8333333333333 48.9583333333333 49.0833333333333 49.125
5 2 1 2
49.2083333333333 49.25 49.375 49.4583333333333
1 2 3 4
49.5 49.625 49.7083333333333 49.875
1 2 2 1
49.9583333333333 50.0833333333333 50.1666666666667 50.2083333333333
1 3 1 1
50.2916666666667 50.3333333333333 50.375 50.4583333333333
1 1 3 1
50.5833333333333 50.625 50.7083333333333 50.7916666666667
1 1 1 2
50.8333333333333 50.875 50.9166666666667 50.9583333333333
1 1 2 2
51.0416666666667 51.0833333333333 51.2083333333333 51.25
1 1 2 3
51.3333333333333 51.375 51.4583333333333 51.7083333333333
2 1 2 4
51.75 51.7916666666667 51.8333333333333 51.9166666666667
2 4 3 1
52 52.0416666666667 52.0833333333333 52.125
1 1 2 1
52.1666666666667 52.2083333333333 52.25 52.3333333333333
1 1 1 1
52.375 52.4583333333333 52.5833333333333 52.625
1 2 3 1
52.6666666666667 52.7083333333333 52.75 52.7916666666667
1 1 2 3
52.875 52.9583333333333 53 53.0416666666667
2 1 1 4
53.0833333333333 53.1666666666667 53.2083333333333 53.25
3 1 4 1
53.3333333333333 53.4583333333333 53.5 53.5416666666667
1 2 4 1
53.5833333333333 53.7083333333333 53.75 53.7916666666667
4 1 1 1
53.8333333333333 53.875 53.9583333333333 54.125
2 1 2 1
54.1666666666667 54.2083333333333 54.2916666666667 54.3333333333333
1 2 4 1
54.4166666666667 54.4583333333333 54.5833333333333 54.625
3 3 2 1
54.6666666666667 54.75 54.7916666666667 54.8333333333333
1 1 2 3
54.875 54.9166666666667 54.9583333333333 55
2 3 1 2
55.0833333333333 55.1666666666667 55.2083333333333 55.25
3 1 1 2
55.2916666666667 55.4166666666667 55.4583333333333 55.7083333333333
2 1 2 3
55.75 55.7916666666667 55.8333333333333 55.9166666666667
1 5 2 1
56 56.0416666666667 56.0833333333333 56.1666666666667
1 1 1 2
56.2083333333333 56.25 56.2916666666667 56.3333333333333
2 1 1 1
56.375 56.4583333333333 56.5416666666667 56.5833333333333
1 3 4 3
56.625 56.6666666666667 56.7083333333333 56.75
1 1 4 1
56.8333333333333 56.875 56.9166666666667 56.9583333333333
2 1 2 3
57 57.0416666666667 57.0833333333333 57.1666666666667
1 3 2 2
57.2083333333333 57.3333333333333 57.4166666666667 57.4583333333333
2 5 2 2
57.5416666666667 57.5833333333333 57.6666666666667 57.7083333333333
3 1 1 2
57.7916666666667 57.8333333333333 57.9166666666667 57.9583333333333
2 1 1 1
58 58.0833333333333 58.125 58.1666666666667
3 3 1 5
58.2916666666667 58.3333333333333 58.375 58.4166666666667
2 2 1 1
58.4583333333333 58.5416666666667 58.5833333333333 58.6666666666667
1 2 1 1
58.7083333333333 58.75 58.7916666666667 58.8333333333333
2 1 5 5
58.875 58.9166666666667 58.9583333333333 59
4 2 2 1
59.0416666666667 59.0833333333333 59.25 59.2916666666667
1 1 1 2
59.3333333333333 59.4583333333333 59.5 59.5416666666667
1 2 1 4
59.5833333333333 59.6666666666667 59.7916666666667 59.8333333333333
2 3 4 3
59.9166666666667 59.9583333333333 60 60.0833333333333
1 1 1 2
60.1666666666667 60.2083333333333 60.2916666666667 60.375
1 1 1 2
60.4583333333333 60.5 60.5416666666667 60.5833333333333
2 4 1 1
60.6666666666667 60.75 60.7916666666667 60.8333333333333
2 1 1 2
60.875 60.9583333333333 61.0416666666667 61.0833333333333
2 1 1 2
61.125 61.1666666666667 61.2083333333333 61.25
1 3 1 1
61.2916666666667 61.3333333333333 61.4166666666667 61.4583333333333
2 1 1 3
61.5416666666667 61.5833333333333 61.6666666666667 61.7083333333333
5 2 4 2
61.7916666666667 61.875 61.9166666666667 61.9583333333333
4 3 4 2
62 62.0416666666667 62.0833333333333 62.125
3 3 2 2
62.1666666666667 62.2083333333333 62.25 62.2916666666667
3 1 1 2
62.375 62.4166666666667 62.5 62.5416666666667
4 1 2 4
62.5833333333333 62.625 62.6666666666667 62.7916666666667
2 4 3 1
62.8333333333333 62.875 62.9166666666667 63
1 2 3 1
63.0416666666667 63.0833333333333 63.1666666666667 63.2083333333333
2 3 6 1
63.25 63.375 63.4166666666667 63.4583333333333
1 3 1 1
63.5 63.5416666666667 63.5833333333333 63.625
1 1 1 3
63.6666666666667 63.75 63.7916666666667 63.875
2 3 2 1
63.9166666666667 64 64.0416666666667 64.0833333333333
3 1 4 1
64.1666666666667 64.2083333333333 64.25 64.2916666666667
4 1 4 2
64.375 64.4166666666667 64.4583333333333 64.5
2 1 1 2
64.5416666666667 64.625 64.6666666666667 64.75
3 3 4 3
64.7916666666667 64.9166666666667 64.9583333333333 65
1 4 1 2
65.0416666666667 65.0833333333333 65.125 65.1666666666667
3 1 1 5
65.2083333333333 65.25 65.2916666666667 65.3333333333333
1 2 3 2
65.375 65.4166666666667 65.4583333333333 65.5
3 6 2 5
65.5833333333333 65.625 65.6666666666667 65.75
1 8 5 3
65.7916666666667 65.8333333333333 65.875 65.9166666666667
2 2 5 5
66 66.0416666666667 66.125 66.1666666666667
4 4 1 6
66.25 66.2916666666667 66.375 66.4166666666667
5 2 2 3
66.4583333333333 66.5 66.5416666666667 66.5833333333333
1 6 1 1
66.625 66.6666666666667 66.7083333333333 66.75
2 2 3 4
66.7916666666667 66.8333333333333 66.875 66.9166666666667
3 1 4 3
66.9583333333333 67 67.0416666666667 67.125
1 4 3 4
67.1666666666667 67.25 67.2916666666667 67.375
4 1 2 3
67.4166666666667 67.5 67.5416666666667 67.5833333333333
4 3 1 1
67.625 67.6666666666667 67.75 67.7916666666667
6 4 3 1
67.8333333333333 67.875 67.9166666666667 68
2 5 2 4
68.0416666666667 68.125 68.1666666666667 68.25
3 4 3 2
68.2916666666667 68.375 68.4166666666667 68.5
6 3 6 3
68.5416666666667 68.5833333333333 68.625 68.6666666666667
3 1 3 6
68.75 68.7916666666667 68.875 68.9166666666667
5 3 2 3
69 69.0416666666667 69.125 69.1666666666667
5 2 4 1
69.25 69.2916666666667 69.375 69.4166666666667
11 5 9 2
69.5 69.5416666666667 69.5833333333333 69.625
3 4 1 7
69.6666666666667 69.75 69.7916666666667 69.875
6 5 2 7
69.9166666666667 70 70.0416666666667 70.0833333333333
2 4 1 1
70.125 70.1666666666667 70.25 70.2916666666667
5 5 5 3
70.3333333333333 70.375 70.4166666666667 70.5
1 6 4 4
70.5416666666667 70.625 70.75 70.7916666666667
3 6 8 1
70.875 70.9166666666667 71 71.0833333333333
14 1 3 1
71.125 71.1666666666667 71.25 71.375
6 1 11 13
71.4166666666667 71.5 71.5416666666667 71.625
6 8 3 7
71.6666666666667 71.75 71.7916666666667 71.875
3 8 4 7
71.9166666666667 72 72.125 72.1666666666667
3 11 7 3
72.25 72.2916666666667 72.375 72.4166666666667
17 3 9 3
72.5 72.5416666666667 72.625 72.6666666666667
8 4 7 2
72.75 72.875 72.9166666666667 73
6 12 1 9
73.0416666666667 73.125 73.1666666666667 73.25
5 16 2 7
73.375 73.4166666666667 73.5 73.625
18 7 13 9
73.6666666666667 73.75 73.7916666666667 73.875
4 14 4 9
73.9166666666667 74 74.0416666666667 74.125
4 13 2 10
74.1666666666667 74.25 74.375 74.4166666666667
3 15 20 1
74.5 74.625 74.6666666666667 74.75
12 19 5 13
74.7916666666667 74.875 74.9166666666667 75
1 10 2 19
75.0416666666667 75.125 75.1666666666667 75.25
1 8 1 17
75.375 75.4166666666667 75.5 75.625
28 2 16 16
75.6666666666667 75.75 75.7916666666667 75.875
5 19 1 20
75.9166666666667 76 76.0416666666667 76.125
6 14 5 14
76.1666666666667 76.25 76.375 76.5
1 18 15 13
76.5416666666667 76.625 76.6666666666667 76.75
1 20 1 12
76.875 76.9166666666667 77 77.125
17 3 24 10
77.1666666666667 77.25 77.2916666666667 77.375
1 19 2 24
77.4166666666667 77.5 77.625 77.75
2 16 34 20
77.875 77.9166666666667 78 78.125
31 3 26 9
78.1666666666667 78.25 78.375 78.4166666666667
5 18 14 2
78.5 78.625 78.75 78.7916666666667
29 27 33 1
78.875 78.9166666666667 79 79.0416666666667
37 2 32 1
79.125 79.1666666666667 79.25 79.375
16 1 39 17
79.4166666666667 79.5 79.5416666666667 79.625
5 43 1 22
79.6666666666667 79.75 79.875 80
1 57 46 37
80.125 80.1666666666667 80.25 80.375
35 1 31 10
80.4166666666667 80.5 80.625 80.6666666666667
3 52 10 2
80.75 80.7916666666667 80.875 81
41 1 36 55
81.125 81.25 81.375 81.4166666666667
57 36 18 1
81.5 81.625 81.75 81.875
35 10 67 14
82 82.125 82.1666666666667 82.25
95 46 1 47
82.375 82.4166666666667 82.5 82.625
38 1 40 6
82.6666666666667 82.75 82.875 83
1 71 9 127
83.125 83.25 83.375 83.4166666666667
28 76 37 1
83.5 83.625 83.75 83.875
39 16 64 5
84 84.125 84.25 84.375
129 17 152 26
84.5 84.625 84.75 84.875
36 15 43 5
85 85.125 85.1666666666667 85.25
107 3 1 195
85.375 85.5 85.625 85.75
15 119 22 37
85.875 86 86.125 86.25
5 57 2 186
86.375 86.4166666666667 86.5 86.625
3 1 233 14
86.75 86.875 87 87.125
55 8 24 2
87.25 87.375 87.5 87.625
84 1 275 3
87.75 87.875 88 88.125
136 6 33 6
88.25 88.5 88.75 88.875
39 162 238 7
89 89.125 89.25 89.5
61 4 21 64
89.75 89.875 90 90.125
201 2 110 5
90.25 90.5 90.75 91
14 21 102 161
91.125 91.25 91.375 91.5
4 48 1 4
91.75 92 92.25 92.375
38 116 89 1
92.5 92.75 93 93.25
11 2 63 88
93.5 93.625 93.75 94
29 1 1 1
94.25 94.5 94.75 95.5
80 48 1 118
95.75 96 96.5 96.75
5 1 4 18
97.5 97.75 98 98.75
1 2 2 1
99 100
1 4
Code
data_scale2$ SF_mh <- log (data_scale$ SF_mh)
table (data_scale2$ SF_mh)
1.09861228866811 2.12525107771113 2.42036812865043 2.47443534992071
1 1 1 1
2.52572864430826 2.62103882411258 2.69968195143169 2.72457950305342
1 1 1 1
2.74887219562247 2.78809290877575 2.82583323675859 2.84053938414829
1 1 1 2
2.84781214347737 2.86220088092947 2.9041650800285 2.9109910450989
2 1 1 1
2.95751106073379 2.9704144655697 2.99573227355399 3.02042488614436
2 1 1 2
3.02650393222074 3.05045717324324 3.10234200861225 3.10608033072286
1 1 1 1
3.11905548958599 3.13549421592915 3.18324864722505 3.18841661738349
3 1 1 1
3.19867311755068 3.20376218705815 3.2088254890147 3.21386328304466
1 1 1 1
3.23867845216438 3.24356843745857 3.24843462710975 3.25327725158553
2 1 3 1
3.25649268843951 3.26766598903763 3.27241659179623 3.27714473299218
1 2 2 2
3.28653447334202 3.30045581186062 3.30505352110925 3.31872115983792
1 2 2 2
3.32323584019244 3.3322045101752 3.33961744256433 3.34109345759245
2 2 1 1
3.34403896782221 3.34550847580157 3.3499040872746 3.3542804618744
1 1 2 2
3.37587957367787 3.3886185994553 3.39142759006635 3.3998075273731
1 2 1 2
3.40535539181082 3.40811782450673 3.40949618447685 3.41772668361337
2 1 1 2
3.42588999425253 3.43398720448515 3.43801135478487 3.44201937618241
1 2 1 2
3.44998754583159 3.45394794704768 3.46182200347859 3.46963454321538
3 1 4 3
3.47351804324178 3.47609868983527 3.4773865200197 3.48124008933569
2 1 1 1
3.48890296208126 3.49271249049793 3.50028828430639 3.50780711672041
3 1 2 2
3.51154543883102 3.51402991215868 3.515269837922 3.51774508671055
3 1 2 1
3.51898041731854 3.52267727919986 3.52513428289292 3.52636052461616
4 6 1 1
3.53003025350512 3.53368656470823 3.53732955598674 3.54095932403731
1 1 5 1
3.5445759645075 3.5469798118189 3.5481795720108 3.55177024014153
2 2 1 1
3.55534806148941 3.55891312765391 3.56128279700923 3.56246552925828
3 4 1 2
3.56482680544396 3.5660053559634 3.56953269648137 3.57304763858881
1 2 2 1
3.57655026914002 3.58583107821449 3.5869851464326 3.58928929491745
1 1 2 1
3.59043938130068 3.59388172549166 3.60073106733723 3.60413822565885
2 1 7 1
3.60753381465998 3.60979115196163 3.61316763237824 3.61429059712286
2 1 1 1
3.62322920412367 3.62434093297637 3.62766872306904 3.63429126382953
1 1 4 1
3.63648906691201 3.64414356027254 3.64740620590736 3.64957540415491
1 1 2 4
3.65065824129374 3.65282040429823 3.65389973521791 3.65713075579936
2 1 1 1
3.65927898433765 3.6603513704994 3.66249269894074 3.66356164612965
1 2 1 1
3.66676164886032 3.66888930923743 3.66995144422842 3.6731310971458
1 1 1 1
3.67630067190708 3.67840815424664 3.67946023219744 3.68260984110034
2 1 2 2
3.68574956110501 3.69096062031776 3.69199958145018 3.69407427099104
1 1 1 1
3.69717825692863 3.69821078154282 3.70130197411249 3.70335747329459
2 1 1 1
3.7043836406499 3.70643282169484 3.70745583968687 3.71051862921742
1 1 3 2
3.71357206670431 3.71661620908554 3.71762886739992 3.71864050127477
2 3 1 1
3.71965111278069 3.72468890681065 3.72569342723665 3.72769944596352
1 1 1 1
3.73070094896727 3.73169945129686 3.73369346990373 3.73667706237062
3 2 2 1
3.73766961828337 3.73965177948736 3.74064138867253 3.74261767390074
1 3 3 2
3.74360435380318 3.74557479779048 3.74852320287478 3.74950407593037
1 1 3 1
3.75439406122456 3.75536919538277 3.7582889054861 3.76023065366901
1 9 4 1
3.76120011569356 3.76410287535152 3.76699723337789 3.76892216178747
1 2 2 1
3.76988323826702 3.77276093809464 3.77563038052259 3.77753877804835
2 2 1 1
3.77849161280362 3.78324221556222 3.78418963391826 3.78702651525346
4 2 3 2
3.78797035675817 3.78891330826604 3.78985537145394 3.79173683955364
1 2 1 2
3.79267624779558 3.79455242095381 3.7982942400998 3.80015991228275
2 2 1 1
3.80295191037378 3.81128143562661 3.81220267014594 3.81404259706794
1 1 2 1
3.81496129258501 3.81679615548513 3.81862765782859 3.81954215263398
4 1 1 3
3.82228062992728 3.82592030637473 3.83945231259331 3.84034796872126
2 1 2 1
3.8412428233671 3.84213687796398 3.84392259272421 3.8448142557347
3 1 2 2
3.84659520010569 3.84926068369183 3.85103373380172 3.85280364576817
3 1 1 1
3.85457043068006 3.85545265393975 3.85985213309924 3.8607297110406
1 2 2 3
3.86248255986801 3.8642323415918 3.86510608564039 3.86772274653157
1 1 1 2
3.86859344750081 3.87033257837394 3.87120101090789 3.87380179260795
3 1 2 1
3.87553189684573 3.87639582778499 3.87725901299181 3.87812145375246
6 1 1 1
3.88070432217072 3.88156379794344 3.88242253565186 3.88328053656249
2 5 1 1
3.88841313978901 3.89096959623031 3.89351953386359 3.89436807018943
5 2 1 2
3.89606298584942 3.8969093676181 3.89944422322129 3.90113056426172
1 2 3 4
3.90197266957464 3.90449473900735 3.90617259174997 3.90951987521003
1 2 2 1
3.91118932467957 3.91368828474721 3.91535079552082 3.91618101557681
1 3 1 1
3.91783939074959 3.91866754814681 3.91949502026685 3.92114791320515
1 1 3 1
3.9236221412715 3.9244455254267 3.92609026263958 3.92773229913333
1 1 1 2
3.92855230737936 3.92937164376276 3.93019030938359 3.93100830533923
1 1 2 2
3.93264229263088 3.93345828614821 3.93590227921809 3.93671561801852
1 1 2 3
3.93834031374552 3.9391516728164 3.94077241871413 3.94561895485666
2 1 2 4
3.94642443214548 3.94722926116277 3.94803344295118 3.94963986899945
2 4 3 1
3.95124371858143 3.95204467977763 3.9528449999484 3.95364468011897
1 1 2 1
3.9544437213121 3.95524212454812 3.95603989084492 3.9576335166802
1 1 1 1
3.9584293782423 3.9600192036964 3.96239921275321 3.96319129200255
1 2 3 1
3.96398274435886 3.96477357081368 3.96556377235618 3.96635334997319
1 1 2 3
3.96793063736644 3.96950544084151 3.97029191355212 3.97107776820946
2 1 1 4
3.97186300578416 3.97343163355679 3.97421502568459 3.97499780458953
3 1 4 1
3.97656152656572 3.97890253426769 3.97968165390196 3.98046016698137
1 2 4 1
3.98123807444962 3.98356817259124 3.98434366700777 3.9851185604987
4 1 1 1
3.9858928539946 3.98666654842391 3.98821214378569 3.99129618632265
2 1 2 1
3.99206571310168 3.99283464816456 3.9943707467769 3.99513791213865
1 2 4 1
3.99667047948843 3.99743588327628 3.99972858584725 4.00049165341575
3 3 2 1
4.00125413915609 4.00277736869661 4.00353811426392 4.00429828153732
1 1 2 3
4.00505787139534 4.00581688471451 4.00657532236937 4.00733318523247
2 3 1 2
4.00884719006369 4.01035890614901 4.01111390807238 4.01186834039786
3 1 1 2
4.01262220398426 4.01488039086785 4.01563198804717 4.020129746754
2 1 2 3
4.02087741034023 4.02162451534323 4.02237106259701 4.02386248718368
1 5 2 1
4.02535169073515 4.02609546168799 4.02683867985673 4.02832346112431
1 1 1 2
4.02906502585981 4.02980604108453 4.03054650761225 4.03128642625496
2 1 1 1
4.03202579782284 4.03350290296586 4.03497782948692 4.0357144777707
1 3 4 3
4.0364505838032 4.03718614838215 4.03792117230352 4.03865565636151
1 1 4 1
4.04012300805546 4.04085587727111 4.04158820978279 4.042320006376
2 1 2 3
4.04305126783455 4.0437819949405 4.04451218847422 4.04597097793788
1 3 2 2
4.04669957542003 4.04888218814534 4.05033462122566 4.05106004744536
2 5 2 2
4.05250932306135 4.05323317397967 4.05467930582967 4.05540158827349
3 1 1 2
4.05684458996689 4.0575653107188 4.05900519577679 4.0597243615755
2 1 1 1
4.06044301054642 4.06187876097252 4.06259586390752 4.06331245297437
3 3 1 5
4.06545914431754 4.0661736852554 4.06688771598906 4.06760123724659
2 2 1 1
4.06831424975451 4.0697387514199 4.07045024202266 4.07187170637004
1 2 1 1
4.07258168155073 4.07329115302427 4.07400012150487 4.07470858770524
2 1 5 5
4.07541655233658 4.07612401610857 4.07683097972939 4.07753744390572
4 2 2 1
4.07824340934273 4.07894887674413 4.08176578001524 4.08246876774191
1 1 1 2
4.08317126162398 4.08527578712889 4.08597631255158 4.08667634758192
1 2 1 4
4.08737589290601 4.08877351717264 4.09086629784578 4.09156291926022
2 3 4 3
4.09295470793305 4.09364987653942 4.0943445622221 4.09573248749695
1 1 1 2
4.09711848910483 4.09781077019859 4.09919389628354 4.10057511197274
1 1 1 2
4.10195442253624 4.1026433650368 4.10333183322234 4.10401982774552
2 4 1 1
4.10539439840869 4.10676708222066 4.10745271817484 4.10813788435444
2 1 1 2
4.10882258140275 4.11019057067218 4.11155669110322 4.11223905209865
2 1 1 2
4.11292094779504 4.11360237882652 4.11428334582593 4.11496384942484
1 3 1 1
4.11564389025349 4.11632346894088 4.11768124240134 4.11835943842597
2 1 1 3
4.11971445218343 4.1203912711602 4.12174353641022 4.12241898391985
5 2 4 2
4.12376851178999 4.12511622088885 4.12578939492976 4.12646211611221
4 3 4 2
4.12713438504509 4.12780620233606 4.12847756859156 4.12914848441679
3 3 2 2
4.12981895041576 4.13048896719125 4.13115853534482 4.13182765547684
3 1 1 2
4.13316455407168 4.13383233372922 4.13516655674236 4.13583300128552
4 1 2 4
4.13649900197613 4.13716455940503 4.13782967416184 4.13982236827855
2 4 3 1
4.14048571821996 4.1411486284199 4.14181109946102 4.14313472639153
1 2 3 1
4.14379588344041 4.14445660364945 4.14577673585437 4.14643614900059
2 3 6 1
4.14709512760763 4.14906946191135 4.14972670807369 4.15038352254722
1 3 1 1
4.15103990589865 4.15169585869357 4.15235138149646 4.15300647487069
1 1 1 3
4.15366113937852 4.15496918403854 4.15562256530974 4.15692804852387
2 3 2 1
4.15758015157926 4.15888308335967 4.15953391319065 4.16018431971764
3 1 4 1
4.16148386505973 4.16213300497217 4.16278172377533 4.16343002201522
4 1 4 2
4.1647253589839 4.16537239879942 4.16601902022512 4.16666522380173
2 1 1 2
4.16731101006892 4.16860133282859 4.16924587039522 4.17053370057965
3 3 4 3
4.17117699426539 4.17310439608275 4.17374603870983 4.17438726989564
1 4 1 2
4.17502809016749 4.17566850005169 4.17630850007353 4.17694809075731
3 1 1 5
4.17758727262631 4.1782260462028 4.17886441200807 4.17950237056241
1 2 3 2
4.18013992238509 4.18077706799441 4.18141380790768 4.18205014264121
3 6 2 5
4.1833215986294 4.18395672091179 4.18459144006988 4.18585967105787
1 8 5 3
4.1864931839077 4.18712629567307 4.18775900686153 4.18839131797965
2 2 5 5
4.18965474202643 4.19028585596344 4.19154689017846 4.19217681145914
4 4 1 6
4.19343546486633 4.19406419798984 4.1953204795621 4.19594802900221
5 2 2 3
4.196575184871 4.19720194766181 4.19782831786707 4.19845429597827
1 6 1 1
4.19907988248601 4.19970507787993 4.20032988264877 4.20095429728036
2 2 3 4
4.20157832226161 4.20220195807851 4.20282520521617 4.20344806415876
3 1 4 3
4.20407053538957 4.20469261939097 4.20531431664444 4.20655655282903
1 4 3 4
4.20717709271863 4.20841701848195 4.20903640530881 4.21027402922916
4 1 2 3
4.21089226727049 4.21212759787848 4.21274469138773 4.21336140432741
4 3 1 1
4.21397773716665 4.21459369037368 4.21582445975981 4.21643927687109
6 4 3 1
4.21705371621454 4.2176677782541 4.21828146345286 4.21950770517611
2 5 2 4
4.22012026262252 4.22134425298341 4.22195568681475 4.22317743406507
3 4 3 2
4.22378774839588 4.22500726074214 4.22561645966443 4.22683374526818
6 3 6 3
4.22744183285153 4.22804955088907 4.22865689982969 4.22926388012147
3 1 3 6
4.23047673654668 4.23108261357218 4.23229326747308 4.23289804523569
5 3 2 3
4.23410650459726 4.23471018707862 4.2359164598425 4.23651905100264
5 2 4 1
4.23772314506745 4.23832464884498 4.2395265720666 4.24012699237884
11 5 9 2
4.24132675257075 4.24192609331389 4.24252507506286 4.24312369824745
3 4 1 7
4.2437219632967 4.24491742070147 4.24551461391122 4.24670793147526
6 5 2 7
4.24730405667921 4.24849524204936 4.24909030306067 4.24968501018495
2 4 1 1
4.25027936384286 4.25087336445433 4.25206030821386 4.25265325219802
5 5 5 3
4.25324584480796 4.25383808645985 4.25442997756917 4.25561270981822
1 6 4 4
4.25620355178519 4.25738418946661 4.25915253652335 4.25974129132399
3 6 8 1
4.26091776204792 4.26150547878537 4.26267987704132 4.26385289770368
14 1 3 1
4.26443889244649 4.26502454400057 4.26619481914876 4.26794766797617
6 1 11 13
4.26853126880978 4.26969744969996 4.27028003054953 4.2714441750349
6 8 3 7
4.27202573945955 4.27318785463973 4.27376840617998 4.27492849911751
3 8 4 7
4.27550804129543 4.27666611901606 4.27840072482826 4.27897825877443
3 11 7 3
4.28013232699254 4.28070886203301 4.28186093589316 4.28243647547739
17 3 9 3
4.28358656186063 4.28416110942024 4.28530921517208 4.28588277412098
8 4 7 2
4.2870289060516 4.28874564467066 4.28931723656961 4.29045944114839
6 12 1 9
4.29103005457329 4.29217030555202 4.29273994384712 4.29387824789718
5 16 2 7
4.29558327814826 4.29615097614818 4.29728540621879 4.29898464197175
18 7 13 9
4.29955041284964 4.30068099521993 4.30124580743489 4.30237447572626
4 14 4 9
4.30293833252158 4.30406509320417 4.30462799780671 4.30575285731789
4 13 2 10
4.30631481293818 4.30743777768281 4.30911986386579 4.3096799310885
3 15 20 1
4.31079912538551 4.31247557171277 4.31303376318693 4.3141492122708
12 19 5 13
4.31470647057443 4.31582005643561 4.31637638468362 4.31748811353631
1 10 2 19
4.31804351482801 4.31915339285537 4.31970787027462 4.32081590362899
1 8 1 17
4.32247565504735 4.32302829391193 4.32413265625498 4.32578691635101
28 2 16 16
4.32633772881329 4.32743844438948 4.32798834817018 4.32908724937966
5 19 1 20
4.32963624747196 4.33073334028633 4.33128143566865 4.33237672603006
6 14 5 14
4.33292392166615 4.33401741548752 4.33565541749176 4.33729074083249
1 18 15 13
4.33783525486718 4.33892339425638 4.33946702025509 4.34055338646731
1 20 1 12
4.34218072612668 4.34272258471485 4.34380542185368 4.34542748222555
17 3 24 10
4.34596758485818 4.34704691577786 4.34758614469359 4.34866373100476
1 19 2 24
4.34920208902584 4.3502779363593 4.35188954025364 4.35349855105934
2 16 34 20
4.35510497710762 4.35563987950069 4.35670882668959 4.35831010805657
31 3 26 9
4.35884329921822 4.35990882942026 4.36150499895308 4.36203648979738
5 18 14 2
4.36309862478836 4.3646897150206 4.36627827770574 4.36680723831051
29 27 33 1
4.36786432086138 4.36839244339808 4.36944785246702 4.36997513958707
37 2 32 1
4.37102888046434 4.37155533480659 4.37260741275739 4.37418345721286
16 1 39 17
4.3747082538662 4.37575702166029 4.3762809933778 4.37732811389233
5 43 1 22
4.3778512632634 4.37889674166495 4.3804629126977 4.38202663467388
1 57 46 37
4.38358791524083 4.38410780087771 4.38514676201012 4.38670318255778
35 1 31 10
4.38722145155099 4.38825718442452 4.38980877511594 4.39032543748858
3 52 10 2
4.39135796210277 4.39187382489471 4.39290475282106 4.39444915467244
41 1 36 55
4.39599117502425 4.39753082120985 4.39906810052873 4.39958000225478
57 36 18 1
4.40060302024682 4.40213558759659 4.40366580977736 4.40519369395542
35 10 67 14
4.40671924726425 4.40824247680477 4.40874970481464 4.40976338964548
95 46 1 47
4.41128199282267 4.41178768183471 4.41279829334063 4.41431229817185
38 1 40 6
4.41481645749687 4.41582401425717 4.41733344850603 4.4188406077966
1 71 9 127
4.42034549897602 4.42184812886055 4.42334850423579 4.42384812952722
28 76 37 1
4.42484663185681 4.42634251844839 4.42783617070518 4.42932759529185
39 16 64 5
4.43081679884331 4.43230378796489 4.43378856923247 4.43527114919269
129 17 152 26
4.43675153436313 4.43822973123244 4.43970574626056 4.44117958587886
36 15 43 5
4.44265125649032 4.44412076446968 4.44461012097565 4.44558811616363
107 3 1 195
4.44705331789095 4.44851637594271 4.44997729658239 4.45143608604605
15 119 22 37
4.45289275054251 4.45434729625351 4.45579972933382 4.45725005591147
5 57 2 186
4.45869828208783 4.45918055844153 4.46014441393783 4.46158845751007
3 1 233 14
4.46303041882697 4.46447030388496 4.46590811865458 4.46734386908069
55 8 24 2
4.46877756108254 4.47020920055397 4.47163879336357 4.47306634535475
84 1 275 3
4.47449186234598 4.47591535013083 4.47733681447821 4.47875626113243
136 6 33 6
4.48017369581341 4.48300255201388 4.48582342835553 4.48723088812341
39 162 238 7
4.48863636973214 4.49003987873446 4.49144142065975 4.49423862528081
61 4 21 64
4.49702802736839 4.49841981604121 4.49980967033027 4.50119759560511
201 2 110 5
4.50258359721299 4.50534985070588 4.50810847314496 4.51085950651685
14 21 102 161
4.51223219032882 4.5136029924626 4.51497191806994 4.51633897228148
4 48 1 4
4.51906748693468 4.52178857704904 4.52450228292064 4.52585637926837
38 116 89 1
4.52720864451838 4.52990770148754 4.53259949315326 4.53528405852393
11 2 63 88
4.53796143629464 4.53929744183738 4.54063166485052 4.54329478227
29 1 1 1
4.54595082632812 4.5485998344997 4.55124184396254 4.55912624748668
80 48 1 118
4.56174062806076 4.56434819146784 4.56954300834494 4.57213033190989
5 1 4 18
4.5798523780038 4.58241319886548 4.58496747867057 4.59259140378123
1 2 2 1
4.59511985013459 4.60517018598809
1 4
Code
SF_mh_preF_factor_log<- lm (log (SF_mh) ~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale,
subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
plot (SF_mh_preF_factor_log, 2 )
Code
shapiro.test (resid (SF_mh_preF_factor_log))
Shapiro-Wilk normality test
data: resid(SF_mh_preF_factor_log)
W = 0.70988, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_mh_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.066108 1 1.032525
Weight 1.207108 1 1.098685
Height 1.140604 1 1.067991
as.factor(Intervention_months_factor) 1.003858 4 1.000481
Postmenopausal females:
Code
#### FEMALE post
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance looks bad (heteroscedasticity)
plot (SF_mh_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_mh_postF, which = 3 ) # Linearity bad, constant variance bad
Code
# formal test in the form of Breusch-Pagan
bptest (SF_mh_postF) # reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_mh_postF
BP = 22.92, df = 7, p-value = 0.00176
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_mh_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_mh_postF)) #reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_mh_postF)
W = 0.80728, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_mh_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_mh_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_mh Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3296 13.8 11.0 19.8 14.2 0 80.2 -66.5 0.0102 11.8
2 5278 29.7 5.98 36.8 4.24 2 76.7 -47.0 0.0129 11.9
3 6670 22.6 3.98 1.81 -9.76 4 77.8 -55.2 0.00898 11.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_mh), almha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 9 × 13
.rownames SF_mh Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 514 27.1 7.98 -6.19 6.24 0 80.9 -53.7 0.00520 11.9
2 1423 25.8 4.98 -13.2 -6.76 2 79.7 -53.9 0.00491 11.9
3 1438 19.2 8.98 11.8 -5.76 2 78.7 -59.4 0.00516 11.9
4 1689 27.2 3.98 -4.19 -1.76 0 78.8 -51.6 0.00317 11.9
5 3296 13.8 11.0 19.8 14.2 0 80.2 -66.5 0.0102 11.8
6 3954 33.1 17.0 -2.19 -16.8 2 80.9 -47.8 0.00554 11.9
7 4753 28.2 4.98 8.81 -8.76 0 77.2 -49.0 0.00407 11.9
8 5844 31.5 11.0 -12.2 -17.8 0 79.8 -48.3 0.00355 11.9
9 6670 22.6 3.98 1.81 -9.76 4 77.8 -55.2 0.00898 11.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_mh_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
Code
# log transform to pass normality.
SF_mh_postF_factor_fix <- lm (log1p (SF_mh) ~ log1p (Age) + log1p (Weight) + log1p (Height) + Intervention_months_factor,
data = data_scale,
subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
qqnorm (resid (SF_mh_postF_factor_fix), col = "grey" )
qqline (resid (SF_mh_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_mh_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(SF_mh_postF_factor_fix)
W = 0.65258, p-value = 1.188e-15
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_mh_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.035224 1 1.017459
Weight 1.147491 1 1.071210
Height 1.172199 1 1.082681
as.factor(Intervention_months_factor) 1.011478 4 1.001428
Males:
Code
### MALE
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot (SF_mh_M, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_mh_M, which = 3 ) # suspect
Code
# formal test in the form of Breusch-Pagan
bptest (SF_mh_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_mh_M
BP = 24.332, df = 7, p-value = 0.0009959
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_mh_M, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_mh_M)) # reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_mh_M)
W = 0.7886, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_mh_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_mh_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_mh Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3057 35.1 13.0 71.8 2.24 4 81.2 -46.0 0.0117 11.9
2 4049 19.5 -22.0 56.8 5.24 1 76.0 -56.5 0.00963 11.9
3 6804 3 -2.02 29.8 5.24 4 80.0 -77.0 0.00331 11.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_mh), almha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 90 × 13
.rownames SF_mh Age[,1] Weigh…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 423 39.6 -9.02 -18.2 3.24 2 79.8 -40.2 3.02e-3 11.9
2 500 39.2 -4.02 -2.19 18.2 0 82.0 -42.8 1.94e-3 11.9
3 501 42.5 -14.0 1.81 3.24 0 79.1 -36.7 1.10e-3 11.9
4 581 38.5 -12.0 -7.19 1.24 0 79.5 -41.1 1.23e-3 11.9
5 662 31 24.0 16.8 1.24 0 84.9 -53.9 1.38e-3 11.9
6 759 38.2 12.0 -6.19 3.24 0 83.7 -45.4 1.02e-3 11.9
7 856 42.8 7.98 -6.19 3.24 0 83.0 -40.3 9.35e-4 11.9
8 897 31.9 -9.02 66.8 23.2 2 79.1 -47.2 8.56e-3 11.9
9 959 37.6 -5.02 13.8 8.24 0 80.7 -43.1 8.20e-4 11.9
10 974 41.2 15.0 17.8 7.24 0 83.8 -42.6 8.88e-4 11.9
# … with 80 more rows, 3 more variables: .cooksd <dbl>, .std.resid <dbl>,
# index <int>, and abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_mh_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
SF_mh_Mfactor_fix <- lm (SF_mh~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (SF_mh_Mfactor_fix), col = "grey" )
qqline (resid (SF_mh_Mfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_mh_Mfactor_fix))
Shapiro-Wilk normality test
data: resid(SF_mh_Mfactor_fix)
W = 0.68271, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_mh_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.093016 1 1.045474
Weight 1.274398 1 1.128892
Height 1.251832 1 1.118853
as.factor(Intervention_months_factor) 1.007197 4 1.000897
We conclude that the assumptions were violated and thus we use robust models.
Robust models
Code
SF_mh_preF_robust <- lmrob (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_mh_postF_robust <- lmrob (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_mh_Mrobust <- lmrob (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_mh_preF_robust, SF_mh_postF_robust, SF_mh_Mrobust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Robust: Mental health (Sf-36), higher score indicates better health" , show.reflvl = T)
Robust: Mental health (Sf-36), higher score indicates better health
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
83.37
82.56 – 84.18
<0.001
82.70
81.68 – 83.72
<0.001
84.61
84.16 – 85.06
<0.001
Age
0.13
0.08 – 0.18
<0.001
0.13
0.07 – 0.18
<0.001
0.11
0.09 – 0.13
<0.001
Weight
-0.00
-0.04 – 0.03
0.896
-0.05
-0.08 – -0.01
0.014
-0.02
-0.04 – 0.01
0.140
Height
0.02
-0.05 – 0.08
0.591
0.05
-0.01 – 0.12
0.104
0.05
0.01 – 0.09
0.008
6-12 months
-1.25
-2.25 – -0.24
0.016
-0.25
-1.29 – 0.79
0.638
0.10
-0.56 – 0.75
0.771
12-18 months
-0.04
-1.01 – 0.92
0.929
0.30
-0.75 – 1.34
0.579
-0.46
-1.13 – 0.22
0.184
18-24 months
-1.29
-2.90 – 0.32
0.117
-1.37
-3.16 – 0.42
0.133
-0.16
-1.10 – 0.79
0.745
24+ months
-0.98
-2.14 – 0.18
0.098
0.06
-1.13 – 1.24
0.925
-0.34
-1.02 – 0.34
0.330
Observations
2404
1476
3595
R2 / R2 adjusted
0.019 / 0.016
0.022 / 0.017
0.058 / 0.056
Warm glow
Warm glow is defined by a score of >= 6 on average on warm glow questions (3 questions).
Predict by ferritin
Code
# Barplot
data_scale_per2 <- data_scale %>%
group_by (Fergroup) %>%
count (Warmglow) %>%
mutate (
Percentage = round (proportions (n) * 100 , 1 ),
res = str_c (n, "(" , Percentage, ")%" )
)
ggplot (subset (data_scale_per2, ! is.na (Warmglow)), aes (Fergroup, Percentage, fill = Warmglow)) +
geom_col (position = "dodge" ) +
geom_text (aes (label = res), vjust = - 0.2 )+
theme_bw ()
Code
ggplot (data_scale , mapping = aes (x = Fergroup, fill = Warmglow)) +
geom_bar (position = "dodge" ) +
theme_bw ()
Code
# t-test + boxplot
res <- t.test (Ferritin ~ Warmglow, data = data_scale, var.equal = TRUE )
res
Two Sample t-test
data: Ferritin by Warmglow
t = -0.48007, df = 6602, p-value = 0.6312
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-2.511109 1.523153
sample estimates:
mean in group No mean in group Yes
37.39408 37.88806
Code
ggplot (subset (data_scale, ! is.na (Warmglow)), aes (x= Warmglow, y= Ferritin)) +
geom_boxplot () +
theme_bw () +
geom_text (x = 2.3 , y = 700 , label= paste ('Means:' , " " , means$ Ferritin[1 ], " " , means$ Ferritin[2 ], " \n " , 't(' ,res$ parameter, '), p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Warm glow" , y= "Ferritin" )
Models
Code
WG_preF <- glm (Warmglow ~ Age + Weight + Height + Intervention_months_factor , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 ,family = binomial)
WG_postF <- glm (Warmglow ~ Age + Weight + Height + Intervention_months_factor , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 ,family = binomial)
WG_M <- glm (Warmglow ~ Age + Weight + Height + Intervention_months_factor , data = data_scale, subset = data_scale$ Gender == "Male" ,family = binomial)
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (WG_preF, WG_postF, WG_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Warmglow, yes/no" , show.reflvl = T)
Warmglow, yes/no
Pre-menopausal women
Post-menopausal women
Men
Predictors
Odds Ratios
CI
p
Odds Ratios
CI
p
Odds Ratios
CI
p
Intercept
1.57
1.27 – 1.95
<0.001
1.58
1.18 – 2.11
0.002
1.53
1.35 – 1.73
<0.001
Age
1.03
1.02 – 1.04
<0.001
1.01
1.00 – 1.03
0.108
1.02
1.02 – 1.03
<0.001
Weight
1.00
0.99 – 1.01
0.954
0.99
0.98 – 1.00
0.265
1.01
1.00 – 1.01
0.030
Height
1.00
0.99 – 1.02
0.596
0.99
0.97 – 1.01
0.320
1.00
0.99 – 1.01
0.761
Intervention_months_factor1
0.99
0.77 – 1.26
0.911
0.86
0.63 – 1.18
0.355
0.83
0.67 – 1.02
0.078
Intervention_months_factor2
1.41
1.11 – 1.80
0.005
0.80
0.60 – 1.07
0.135
0.97
0.79 – 1.19
0.767
Intervention_months_factor3
1.03
0.75 – 1.40
0.866
0.43
0.27 – 0.68
<0.001
0.72
0.55 – 0.95
0.019
Intervention_months_factor4
1.06
0.81 – 1.39
0.655
0.90
0.62 – 1.34
0.605
0.81
0.65 – 1.00
0.053
Observations
2167
1403
3386
R2 Tjur
0.016
0.014
0.030
Assumptions
Premenopausal females:
Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot (WG_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (WG_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 13
.rowna…¹ Warmg…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .std.…⁶ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 275 No -6.02 49.8 5.24 2 0.637 -1.46 -1.47 0.0125
2 4031 No 0.985 36.8 1.24 3 0.503 -1.40 -1.41 0.0118
3 5873 Yes -24.0 48.8 -4.76 1 -0.266 1.29 1.30 0.0179
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²Warmglow, ³Weight[,1], ⁴Height[,1],
# ⁵Intervention_months_factor, ⁶.std.resid
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
cooksd = cooks.distance (WG_preF)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# infleunce plot
car:: influencePlot (WG_preF,col= "red" ,id.n= 3 )
StudRes Hat CookD
275 -1.465201 0.012464664 0.003020593
339 1.203187 0.015112249 0.002036705
386 -1.550097 0.004144052 0.001205968
405 -1.552705 0.003995963 0.001169521
5873 1.301589 0.017863410 0.003021551
Code
car:: influenceIndexPlot (WG_preF, id.n= 3 )
Code
#multicollinearity
car:: vif (WG_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.066877 1 1.032897
Weight 1.207036 1 1.098652
Height 1.139041 1 1.067259
Intervention_months_factor 1.002697 4 1.000337
Postmenopausal females:
Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot (WG_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (WG_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 13
.rowna…¹ Warmg…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .std.…⁶ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1032 Yes 2.98 57.8 -2.76 1 0.0422 1.16 1.18 0.0300
2 2675 No 11.0 55.8 1.24 0 0.273 -1.30 -1.31 0.0239
3 5303 No 25.0 38.8 -7.76 2 0.425 -1.36 -1.38 0.0188
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²Warmglow, ³Weight[,1], ⁴Height[,1],
# ⁵Intervention_months_factor, ⁶.std.resid
Code
#Cook's distance
cooksd = cooks.distance (WG_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# infleunce plot
car:: influencePlot (WG_postF,col= "red" ,id.n= 3 )
StudRes Hat CookD
1032 1.1722690 0.030003491 0.003821410
2675 -1.3078974 0.023885123 0.004119193
4404 0.7155689 0.043294905 0.001676371
5494 -1.6515737 0.004852339 0.001766253
6518 -1.6384692 0.004893419 0.001730525
Code
car:: influenceIndexPlot (WG_postF, id.n= 3 )
Code
#multicollinearity
car:: vif (WG_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.036025 1 1.017853
Weight 1.159329 1 1.076721
Height 1.180711 1 1.086605
Intervention_months_factor 1.018630 4 1.002310
Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot (WG_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (WG_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 13
.rown…¹ Warmg…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .std.…⁶ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 897 No -9.02 66.8 23.2 2 0.720 -1.49 -1.50 0.00889
2 1294 No -9.02 -3.19 -56.8 2 0.0864 -1.21 -1.23 0.0329
3 1510 No 25.0 41.8 -6.76 2 1.19 -1.71 -1.71 0.00557
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²Warmglow, ³Weight[,1], ⁴Height[,1],
# ⁵Intervention_months_factor, ⁶.std.resid
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = Warmglow), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # see zero influential data points
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, Warmglow <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
# .cooksd <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (WG_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# infleunce plot
car:: influencePlot (WG_M,col= "red" ,id.n= 3 )
StudRes Hat CookD
897 -1.500435 0.008894675 0.0023248247
1294 -1.229454 0.032856980 0.0047869881
1510 -1.712966 0.005574479 0.0023231165
5998 0.853050 0.011983516 0.0006676027
8023 -1.704294 0.002536407 0.0010373587
Code
car:: influenceIndexPlot (WG_M, id.n= 3 )
Code
#multicollinearity
car:: vif (WG_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.086787 1 1.042491
Weight 1.263387 1 1.124005
Height 1.249615 1 1.117862
Intervention_months_factor 1.005663 4 1.000706
Fatigue
Predict by ferritin
Code
res <- cor.test (data_scale$ CIS_Totalmean, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = CIS_Totalmean, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 120 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Fatigue, higher is worse" , y= "Ferritine" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= CIS_Totalmean)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Fatigue, higher is worse" )
Models
Code
CIS_preF <- lm (CIS_Totalmean ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CIS_postF <- lm (CIS_Totalmean ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
CIS_M <- lm (CIS_Totalmean ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (CIS_preF, CIS_postF, CIS_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Fatigue, higher score is worse, show.reflvl = T, digits = 3" )
Fatigue, higher score is worse, show.reflvl = T, digits = 3
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
63.43
61.46 – 65.41
<0.001
58.63
55.40 – 61.86
<0.001
61.01
59.70 – 62.31
<0.001
Age
0.09
-0.02 – 0.19
0.112
-0.15
-0.33 – 0.04
0.118
-0.07
-0.13 – -0.02
0.004
Weight
0.07
-0.00 – 0.14
0.060
0.17
0.06 – 0.29
0.003
0.05
-0.01 – 0.12
0.125
Height
-0.12
-0.25 – 0.01
0.076
-0.22
-0.42 – -0.01
0.042
-0.13
-0.24 – -0.01
0.029
Intervention_months_factor1
5.29
3.05 – 7.53
<0.001
11.65
8.16 – 15.15
<0.001
7.12
4.91 – 9.33
<0.001
Intervention_months_factor2
-2.46
-4.69 – -0.23
0.030
-1.81
-5.10 – 1.49
0.282
1.14
-0.96 – 3.25
0.288
Intervention_months_factor3
19.43
16.64 – 22.22
<0.001
28.68
23.51 – 33.86
<0.001
24.05
21.11 – 26.98
<0.001
Intervention_months_factor4
19.36
16.95 – 21.77
<0.001
28.53
24.27 – 32.79
<0.001
24.89
22.63 – 27.15
<0.001
Observations
2360
1471
3558
R2 / R2 adjusted
0.159 / 0.157
0.175 / 0.171
0.161 / 0.159
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity good, constant variance (heteroscedasticity)
plot (CIS_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (CIS_preF, which = 3 ) # Linearity good, constant variance
Code
# formal test in the form of Breusch-Pagan
bptest (CIS_preF) # The constant variance assumption is not violated.
studentized Breusch-Pagan test
data: CIS_preF
BP = 427.45, df = 7, p-value < 2.2e-16
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CIS_preF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CIS_preF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CIS_preF)
W = 0.98128, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (CIS_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CIS_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames CIS_T…¹ Age[,1] Weigh…² Heigh…³ Inter…⁴ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 275 107 -6.02 49.8 5.24 2 63.2 43.8 0.0122 19.3
2 339 26 -17.0 51.8 -1.76 0 65.7 -39.7 0.0133 19.3
3 5873 29 -24.0 48.8 -4.76 1 70.6 -41.6 0.0160 19.3
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹CIS_Totalmean, ²Weight[,1], ³Height[,1],
# ⁴Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CIS_Totalmean), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 2 × 13
.rowna…¹ CIS_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 2612 123 -23.0 -8.19 3.24 0 60.5 62.5 0.00256 19.3
2 5073 122 -15.0 -4.19 0.240 2 59.4 62.6 0.00285 19.3
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CIS_Totalmean, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
#Cook's distance
cooksd = cooks.distance (CIS_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# check log transform to pass normality.
data_scale2 <- data_scale
table (data_scale2$ CIS_Totalmean)
20 21 21.0526315789474 22
131 42 5 62
22.1052631578947 23 23.1578947368421 24
4 56 7 72
24.2105263157895 24.4444444444444 25 25.2631578947368
7 1 60 5
26 26.3157894736842 26.6666666666667 27
97 6 1 81
27.3684210526316 28 28.4210526315789 28.8888888888889
6 88 6 1
29 29.4736842105263 30 30.5263157894737
74 8 76 5
31 31.5789473684211 32 32.6315789473684
91 6 92 6
33 33.3333333333333 33.6842105263158 34
86 1 6 78
34.7368421052632 35 35.7894736842105 36
8 82 11 82
36.8421052631579 37 37.7777777777778 37.8947368421053
3 87 1 5
38 38.9473684210526 39 40
76 8 90 80
41 41.0526315789474 42 42.1052631578947
71 3 87 4
42.2222222222222 43 43.1578947368421 44
1 76 8 91
44.2105263157895 44.7058823529412 45 45.2631578947368
7 1 67 8
46 46.3157894736842 47 47.3684210526316
62 4 72 5
48 48.4210526315789 49 49.4736842105263
63 5 69 1
50 50.5263157894737 51 51.1111111111111
49 1 67 2
51.5789473684211 52 52.6315789473684 53
5 52 8 35
53.6842105263158 54 54.7368421052632 55
4 34 5 55
55.7894736842105 56 56.4705882352941 56.8421052631579
2 36 1 2
57 57.7777777777778 58 58.9473684210526
38 1 38 3
59 60 61 61.0526315789474
38 47 35 4
62 62.1052631578947 63 63.1578947368421
27 3 34 3
64 65 65.2631578947368 65.4545454545455
33 46 1 1
66 66.3157894736842 67 67.2727272727273
43 2 35 1
68 68.4210526315789 69 69.4736842105263
36 3 50 1
70 70.5263157894737 71 71.5789473684211
45 5 58 1
72 72.6315789473684 73 73.6842105263158
71 1 69 3
73.75 74 74.1176470588235 74.7368421052632
1 86 1 5
75 75.2941176470588 75.7894736842105 76
99 1 4 110
76.8421052631579 77 77.7777777777778 77.8947368421053
4 119 1 7
78 78.8235294117647 78.8888888888889 78.9473684210526
151 1 2 4
79 80 81 81.0526315789474
156 172 203 1
81.1111111111111 81.1764705882353 82 82.1052631578947
1 1 194 11
82.2222222222222 83 83.1578947368421 83.3333333333333
1 261 14 1
83.5294117647059 84 84.2105263157895 85
1 264 11 273
85.2631578947368 86 86.3157894736842 86.6666666666667
10 430 4 1
87 87.3684210526316 87.5 88
309 7 1 279
88.421052631579 88.8888888888889 89 89.4736842105263
6 1 207 7
90 90.5263157894737 91 91.578947368421
160 7 140 4
92 92.2222222222222 92.6315789473684 93
135 1 5 97
93.3333333333333 93.6842105263158 94 94.7368421052632
1 4 55 3
95 95.7894736842105 96 96.6666666666667
55 3 45 1
96.8421052631579 97 97.8947368421053 98
1 23 1 24
98.8888888888889 98.9473684210526 99 100
1 1 20 8
101 101.052631578947 102 102.105263157895
9 2 9 1
103 103.157894736842 104 105
5 1 5 2
106 107 108 109
2 9 2 4
109.473684210526 110 112 114
1 1 1 3
115 116 118 119
2 4 2 2
121 122 123 125
1 2 2 1
127 128 128.888888888889 131
1 2 1 1
Code
data_scale2$ CIS_Totalmean <- log10 (data_scale$ CIS_Totalmean)
table (data_scale2$ CIS_Totalmean)
1.30102999566398 1.32221929473392 1.32330639037513 1.34242268082221
131 42 5 62
1.34449568944507 1.36172783601759 1.36469907553336 1.38021124171161
4 56 7 72
1.38400423072875 1.38818017138288 1.39794000867204 1.40248763642276
7 1 60 5
1.41497334797082 1.42021640338319 1.42596873227228 1.43136376415899
97 6 1 81
1.43724974268197 1.44715803134222 1.45364015887014 1.46073083853149
6 88 6 1
1.46239799789896 1.46943442605337 1.47712125471966 1.48467439261011
74 8 76 5
1.49136169383427 1.49939764943081 1.50514997831991 1.51363808854542
91 6 92 6
1.51851393987789 1.52287874528034 1.52742637303106 1.53147891704226
86 1 6 78
1.54079033458904 1.54406804435028 1.55375531175341 1.55630250076729
8 82 11 82
1.56634443906143 1.56820172406699 1.57723640760293 1.57857889547844
3 87 1 5
1.57978359661681 1.59047811877815 1.5910646070265 1.60205999132796
76 8 90 80
1.61278385671974 1.61334100173765 1.6232492903979 1.62433638603911
71 3 87 4
1.62554108717749 1.63346845557959 1.63506025143089 1.64345267648619
1 76 8 91
1.64552568510905 1.65036467090252 1.65321251377534 1.65574485029074
7 1 67 8
1.66275783168157 1.66572907119734 1.67209785793572 1.6754889084865
62 4 72 5
1.68124123737559 1.68503422639273 1.69019608002851 1.69437425264687
63 5 69 1
1.69897000433602 1.70351763208674 1.70757017609794 1.70851532224225
49 1 67 2
1.71247247473967 1.7160033436348 1.72124639904717 1.72427586960079
5 52 8 35
1.72984657080909 1.73239375982297 1.73827973834595 1.74036268949424
4 34 5 55
1.74655226431194 1.7481880270062 1.75182231166129 1.75467015453412
2 36 1 2
1.75587485567249 1.76176083419547 1.76342799356294 1.77046442171735
38 1 38 3
1.77085201164214 1.77815125038364 1.78532983501077 1.78570438827409
38 47 35 4
1.79239168949825 1.7931284063533 1.79934054945358 1.8004276450948
27 3 34 3
1.80617997398389 1.81291335664286 1.81466808420941 1.81593981127304
33 46 1 1
1.81954393554187 1.82161694416473 1.82607480270083 1.82783903457275
43 2 35 1
1.83250891270624 1.83518975135401 1.83884909073726 1.84182033025302
36 3 50 1
1.84509804001426 1.84835119741198 1.85125834871908 1.85478530741739
45 5 58 1
1.85733249643127 1.86112548544841 1.86332286012046 1.86737443472541
71 1 69 3
1.8677620246502 1.86923171973098 1.86992162373929 1.87353474343023
1 86 1 5
1.8750612633917 1.87676104826959 1.87960889114242 1.88081359228079
99 1 4 110
1.88559925483161 1.88649072517248 1.89085553057493 1.89150811444213
4 119 1 7
1.89209460269048 1.89665587698653 1.89701583927975 1.89733765810285
151 1 2 4
1.89762709129044 1.90308998699194 1.90848501887865 1.90876711988363
156 172 203 1
1.90908035068113 1.90943016502296 1.91381385238372 1.91437099740163
1 1 194 11
1.91498921029165 1.91907809237607 1.91990348600159 1.92081875395238
1 261 14 1
1.92183942300478 1.92427928606188 1.9253663817031 1.92941892571429
1 264 11 273
1.9307614135898 1.93449845124357 1.93609024709487 1.93785209325116
10 430 4 1
1.93951925261862 1.94135448708723 1.94200805302231 1.94448267215017
309 7 1 279
1.94655568077303 1.94884747755262 1.94939000664491 1.95169532042545
6 1 207 7
1.95424250943932 1.95677484595472 1.95904139232109 1.96179564732977
160 7 140 4
1.96378782734556 1.96483558293675 1.96675906686132 1.96848294855394
135 1 5 97
1.97003677662256 1.97166640135606 1.9731278535997 1.97651890415048
1 4 55 3
1.97772360528885 1.98131778703225 1.98227123303957 1.98527674317929
55 3 45 1
1.98606422205671 1.98677173426624 1.99075934326509 1.99122607569249
1 23 1 24
1.99514749720559 1.99540424831085 1.99563519459755 2
1 1 20 8
2.00432137378264 2.00454762775072 2.00860017176192 2.0090481289774
9 2 9 1
2.01283722470517 2.01350247040365 2.01703333929878 2.02118929906994
5 1 5 2
2.02530586526477 2.02938377768521 2.03342375548695 2.03742649794062
2 9 2 4
2.03930973400993 2.04139268515822 2.04921802267018 2.05690485133647
1 1 1 3
2.06069784035361 2.06445798922692 2.07188200730613 2.07554696139253
2 4 2 2
2.08278537031645 2.08635983067475 2.0899051114394 2.09691001300806
1 2 2 1
2.10380372095596 2.10720996964787 2.11021547978759 2.11727129565576
1 2 1 1
Code
#analaysis with log transformed
CIS_preF_factor_fix <- lm (CIS_Totalmean~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
qqnorm (resid (CIS_preF_factor_fix), col = "grey" )
qqline (resid (CIS_preF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CIS_preF_factor_fix))
Shapiro-Wilk normality test
data: resid(CIS_preF_factor_fix)
W = 0.95049, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CIS_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.066081 1 1.032512
Weight 1.204323 1 1.097417
Height 1.137514 1 1.066543
Intervention_months_factor 1.003466 4 1.000433
Postmenopausal females: *
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity ok, constant variance looks ok(heteroscedasticity)
plot (CIS_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CIS_postF, which = 3 ) # constant variance ok
Code
# formal test in the form of Breusch-Pagan
bptest (CIS_postF) # we fail to reject the null of homoscedasticity. The constant variance assumption is not violated.
studentized Breusch-Pagan test
data: CIS_postF
BP = 276.24, df = 7, p-value < 2.2e-16
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CIS_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CIS_postF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CIS_postF)
W = 0.9598, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (CIS_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CIS_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames CIS_T…¹ Age[,1] Weigh…² Heigh…³ Inter…⁴ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1201 26 3.98 -12.2 -21.8 1 72.3 -46.3 0.0103 23.9
2 4404 91 9.98 -16.2 -59.8 0 67.3 23.7 0.0533 24.0
3 6399 24.2 29.0 -17.2 -6.76 3 81.5 -57.3 0.0145 23.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹CIS_Totalmean, ²Weight[,1], ³Height[,1],
# ⁴Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CIS_Totalmean), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, CIS_Totalmean <dbl>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (CIS_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
#analaysis with log transformed
CIS_postF_factor_fix <- lm (CIS_Totalmean~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (CIS_postF_factor_fix), col = "grey" )
qqline (resid (CIS_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CIS_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(CIS_postF_factor_fix)
W = 0.95193, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CIS_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.035449 1 1.017570
Weight 1.149008 1 1.071918
Height 1.173224 1 1.083155
Intervention_months_factor 1.012342 4 1.001534
Males:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot (CIS_M, which = 1 ) # Linearity ok, constant variance looks ok(heteroscedasticity)
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CIS_M, which = 3 ) # Linearity ok, constant variance looks ok(heteroscedasticity)
Code
# formal test in the form of Breusch-Pagan
bptest (CIS_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: CIS_M
BP = 925.08, df = 7, p-value < 2.2e-16
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CIS_M, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CIS_M)) #reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CIS_M)
W = 0.95889, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (CIS_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CIS_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rowna…¹ CIS_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1006 128 26.0 35.8 3.24 1 67.6 60.4 0.00424 22.5
2 2510 26.3 15.0 19.8 6.24 3 84.2 -57.9 0.00432 22.5
3 5998 38 3.98 71.8 3.24 1 71.1 -33.1 0.0122 22.5
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CIS_Totalmean, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CIS_Totalmean), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, CIS_Totalmean <dbl>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (CIS_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
#analaysis with log transformed
CISMfactor_fix <- lm (CIS_Totalmean~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (CISMfactor_fix), col = "grey" )
qqline (resid (CISMfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CISMfactor_fix))
Shapiro-Wilk normality test
data: resid(CISMfactor_fix)
W = 0.95193, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CIS_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.092206 1 1.045087
Weight 1.272488 1 1.128046
Height 1.251062 1 1.118509
Intervention_months_factor 1.007520 4 1.000937
::: {.callout-note appearance=“default”} ### Decision
We conclude that the assumptions were violated and thus we use robust models. :::
Robust models
Code
CIS_preF_robust <- lmrob (CIS_Totalmean ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CIS_postF_robust <- lmrob (CIS_Totalmean ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
CIS_M_robust <- lmrob (CIS_Totalmean ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (CIS_preF_robust, CIS_postF_robust, CIS_M_robust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Fatigue, higher score is worse (robust regression)" , show.reflvl = T, digits = 3 )
Fatigue, higher score is worse (robust regression)
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
65.425
62.344 – 68.505
<0.001
57.612
53.198 – 62.027
<0.001
62.116
59.324 – 64.908
<0.001
Age
0.159
0.028 – 0.290
0.017
-0.133
-0.373 – 0.108
0.278
-0.044
-0.122 – 0.035
0.274
Weight
0.074
-0.008 – 0.156
0.078
0.215
0.083 – 0.348
0.001
0.048
-0.032 – 0.128
0.243
Height
-0.142
-0.293 – 0.009
0.065
-0.269
-0.523 – -0.014
0.038
-0.163
-0.309 – -0.016
0.029
6-12 months
7.769
4.777 – 10.762
<0.001
15.888
10.393 – 21.383
<0.001
12.500
8.955 – 16.046
<0.001
12-18 months
-3.724
-7.530 – 0.082
0.055
-2.147
-7.304 – 3.011
0.414
2.222
-2.036 – 6.480
0.306
18-24 months
18.308
16.092 – 20.525
<0.001
29.788
26.485 – 33.091
<0.001
23.248
20.459 – 26.038
<0.001
24+ months
18.288
16.115 – 20.462
<0.001
29.068
25.947 – 32.189
<0.001
24.056
21.316 – 26.796
<0.001
Observations
2360
1471
3558
R2 / R2 adjusted
0.173 / 0.171
0.199 / 0.195
0.175 / 0.173